dataFrame.hpp
Go to the documentation of this file.
1 #ifndef JULIAN_DATAFRAME_HPP
2 #define JULIAN_DATAFRAME_HPP
3 
4 #include <iostream>
5 #include <vector>
6 #include <string>
7 #include <fstream>
8 #include <map>
9 #include <dates/date.hpp>
10 #include <dates/tenor.hpp>
11 #include <dates/timeUnit.hpp>
12 #include <utils/SmartPointer.hpp>
13 #include <utils/objectFactory.hpp>
14 #include <utils/valueFactory.hpp>
15 namespace julian {
16 
17  class DataEntryClerk;
18 
33  class DataFrame {
34  public:
35  DataFrame(){};
36  DataFrame(std::string file_name, char delimiter, bool first_row_is_column_names, int primery_key = 0);
37 
38  std::string operator()(const int col, const int row);
39  std::string operator()(const std::string col, const int row);
40  std::string operator()(const std::string col, const std::string row);
41 
42  std::vector<std::string> operator()(int col);
43  std::vector<std::string> operator()(std::string col);
44 
45  double getDouble(const int col, const int row);
46  double getDouble(const std::string col, const int row);
47  double getDouble(const std::string col, const std::string row);
48 
49  Date getDate(const int col, const int row, Date::Format fmt);
50  Date getDate(const std::string col, const int row, Date::Format fmt);
51  Date getDate(const std::string col, const std::string row, Date::Format fmt);
52 
53  std::vector<std::string> getColumnNames() const;
54  std::vector<std::string> getPrimaryKeys() const;
55  std::map<std::string, std::string> getRow(int row) const;
56 
57  void append(const DataEntryClerk&);
58  void append(const DataFrame&);
59 
60  void print(int n_rows = -1);
61  void printToCsv(std::string file_name, char delimiter = ';');
62 
63  int getNumberOfRows() const;
64  int getNumberOfColumns() const;
65 
66  template<class T>
67  SmartPointer<T> getObject(const int col, const int row);
68  template<class T>
69  SmartPointer<T> getObject(const std::string col, const int row);
70  template<class T>
71  SmartPointer<T> getObject(const std::string col, const std::string row);
72  template<class T>
73  T getValue(const int col, const int row);
74  template<class T>
75  T getValue(const std::string col, const int row);
76  template<class T>
77  T getValue(const std::string col, const std::string row);
78 
79  template<class T>
80  DataFrame filter(int col,T filter);
81  template<class T>
82  DataFrame filter(std::string col,T filter);
83 
84  friend std::ostream& operator<<(std::ostream&, DataFrame&);
85  private:
86  void parseFile(std::string file_name, char delimiter);
87  void createColumnNames(bool first_row);
88  // void printHLine(std::vector<int> );
89 
90  std::vector<std::vector<std::string> > data_;
91  std::map<std::string,int> column_names_;
92  std::map<std::string,int> primary_key_;
93  int ncols_ = 0;
94  int nrows_ = 0;
95  };
96 
102  template<class T>
103  SmartPointer<T> DataFrame::getObject(const int col, const int row) {
104  return ObjectFactory<T>::instance().getObject(data_[row-1][col-1]);
105  }
106 
113  template<class T>
114  SmartPointer<T> DataFrame::getObject(const std::string col, const int row) {
115  BOOST_ASSERT_MSG(!column_names_.empty(), "Column names not defined");
116  return ObjectFactory<T>::instance().getObject(data_[row-1][column_names_[col]-1]);
117  }
118 
125  template<class T>
126  SmartPointer<T> DataFrame::getObject(const std::string col, const std::string row) {
127  BOOST_ASSERT_MSG(!primary_key_.empty(), "Primary key not defined");
128  BOOST_ASSERT_MSG(!column_names_.empty(), "Column names not defined");
130  }
131 
137  template<class T>
138  T DataFrame::getValue(const int col, const int row) {
139  return ValueFactory<T>::instance().getValue(data_[row-1][col-1]);
140  }
141 
148  template<class T>
149  T DataFrame::getValue(const std::string col, const int row) {
150  BOOST_ASSERT_MSG(!column_names_.empty(), "Column names not defined");
151  return ValueFactory<T>::instance().getValue(data_[row-1][column_names_[col]-1]);
152  }
153 
160  template<class T>
161  T DataFrame::getValue(const std::string col, const std::string row) {
162  BOOST_ASSERT_MSG(!primary_key_.empty(), "Primary key not defined");
163  BOOST_ASSERT_MSG(!column_names_.empty(), "Column names not defined");
165  }
166 
172  template<class T>
174  DataFrame filtered;
175  for (int r = 1; r <= nrows_; r++) {
176  if (filter(data_[r-1][col-1])) {
177  filtered.data_.push_back(data_[r-1]);
178  }
179  }
180 
181  filtered.column_names_ = column_names_;
182  filtered.ncols_ = ncols_;
183  filtered.nrows_ = filtered.data_.size();
184 
185  return filtered;
186  }
187 
194  template<class T>
195  DataFrame DataFrame::filter(std::string col,T filter) {
196  BOOST_ASSERT_MSG(!column_names_.empty(), "Column names not defined");
197  DataFrame filtered;
198  for (int r = 1; r <= nrows_; r++) {
199  if (filter(data_[r-1][column_names_[col] - 1])) {
200  filtered.data_.push_back(data_[r-1]);
201  }
202  }
203 
204  filtered.column_names_ = column_names_;
205  filtered.ncols_ = ncols_;
206  filtered.nrows_ = filtered.data_.size();
207 
208  return filtered;
209 ;
210  }
218  public:
219 
223  void add(std::string, std::string);
224  void add(std::string, double);
225  void add(std::string, int);
226  void add(std::string, Date);
227  void add(std::string, Tenor);
228 
233  template<typename T>
234  void add(const std::map<std::string, T>& input) {
235  for (auto item : input)
236  add(item.first, item.second);
237  }
238 
239  friend class DataFrame;
240  private:
241  std::map<std::string, std::string> data_;
242  };
243 
244 } // namespace julian
245 
246 
247 #endif
void createColumnNames(bool first_row)
creates columns names basing on the first row of csv file
Definition: dataFrame.cpp:70
Format
Date formats.
Definition: date.hpp:31
std::map< std::string, int > primary_key_
Maps row name to row number.
Definition: dataFrame.hpp:92
File contains template of deep-coping smart pointer.
std::map< std::string, int > column_names_
Maps column name to column number.
Definition: dataFrame.hpp:91
void append(const DataEntryClerk &)
adds data stored by DataEntryClerk
Definition: dataFrame.cpp:207
std::vector< std::string > getColumnNames() const
Returns column names as vector of strings.
Definition: dataFrame.cpp:335
Date getDate(const int col, const int row, Date::Format fmt)
Reads cell in c-th column and r-th row.
Definition: dataFrame.cpp:176
File contains definition of Factory pattern.
int ncols_
number of columns
Definition: dataFrame.hpp:93
std::map< std::string, std::string > data_
Maps that stores inputs provided using method DataEntryClerk::add.
Definition: dataFrame.hpp:241
Definition: cadHoliday.cpp:3
friend std::ostream & operator<<(std::ostream &, DataFrame &)
Overloads stream operator.
Definition: dataFrame.cpp:387
DataEntryClerk()
Default constructor.
Definition: dataFrame.hpp:222
void add(const std::map< std::string, T > &input)
add a map to data
Definition: dataFrame.hpp:234
int getNumberOfColumns() const
Returns number of columns.
Definition: dataFrame.cpp:357
Class used to provide data to julian::DataFrame.
Definition: dataFrame.hpp:217
Template of deep-coping smart pointer.
Definition: smartPointer.hpp:14
double getDouble(const int col, const int row)
Reads cell in c-th column and r-th row.
Definition: dataFrame.cpp:146
int getNumberOfRows() const
Returns number of rows.
Definition: dataFrame.cpp:351
File contains definition of date class.
int nrows_
number of rows
Definition: dataFrame.hpp:94
SmartPointer< T > getObject(std::string name)
returns the a SmartPointer pointing new object of type dependent on string provided ...
Definition: objectFactory.hpp:75
std::vector< std::string > getPrimaryKeys() const
Returns primary keys as vector of strings.
Definition: dataFrame.cpp:325
File contains definition of Factory pattern.
static ObjectFactory & instance()
returns reference to instance of singleton factory
Definition: objectFactory.hpp:87
SmartPointer< T > getObject(const int col, const int row)
Reads cell in c-th column and r-th row.
Definition: dataFrame.hpp:103
void parseFile(std::string file_name, char delimiter)
parse csv file, result of method is saved in DataFrame::data_
Definition: dataFrame.cpp:37
T getValue(const int col, const int row)
Reads cell in c-th column and r-th row.
Definition: dataFrame.hpp:138
Class implements a date object.
Definition: date.hpp:27
File contains definition of tenor class.
std::map< std::string, std::string > getRow(int row) const
Returns row in form of std::map.
Definition: dataFrame.cpp:363
Class implements a tenor object.
Definition: tenor.hpp:23
DataFrame filter(int col, T filter)
Filter the data frame basing on data hold in column.
Definition: dataFrame.hpp:173
void print(int n_rows=-1)
Prints DataFrame.
Definition: dataFrame.cpp:237
T getValue(std::string name)
returns enumeration value
Definition: valueFactory.hpp:62
static ValueFactory & instance()
returns reference to instance of singleton factory
Definition: valueFactory.hpp:52
void printToCsv(std::string file_name, char delimiter= ';')
prints DataFrame to csv file
Definition: dataFrame.cpp:294
std::string operator()(const int col, const int row)
Reads cell in c-th column and r-th row.
Definition: dataFrame.cpp:92
std::vector< std::vector< std::string > > data_
Holds data as 2d table of strings.
Definition: dataFrame.hpp:90
Class used to handle data read from csv files.
Definition: dataFrame.hpp:33
File contain time units and other useful enumerations.