MeteoIODoc 20241221.207bde49
CsvParams.h
Go to the documentation of this file.
1// SPDX-License-Identifier: LGPL-3.0-or-later
2/***********************************************************************************/
3/* Copyright 2023 WSL Institute for Snow and Avalanche Research SLF-DAVOS */
4/***********************************************************************************/
5/* This file is part of MeteoIO.
6 MeteoIO is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 MeteoIO is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with MeteoIO. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef CSVPARAMS_H
20#define CSVPARAMS_H
21
22#include <meteoio/IOUtils.h>
23#include <meteoio/IOInterface.h> //for LinesRange
24
25#include <string>
26#include <vector>
27
28namespace mio {
29
32 public:
33 CsvDateTime(const double& tz_in);
34
35 //this matches the formats that are supported in Date
36 typedef enum DECIMAL_DATE_FORMATS {
42 UNIX
44
45 void updateMaxCol();
46 int getFixedYear(const double& i_jdn);
47 int getFixedYear(const int& i_month);
48 int getFixedHour();
49 bool isSet() const;
50
51 void setDateTimeSpec(const std::string& datetime_spec);
52 void setDateSpec(const std::string& date_spec);
53 void setTimeSpec(const std::string& time_spec);
54 void setDecimalDateType(std::string i_decimaldate_type);
55 void setFixedYear(const int& i_year, const bool& i_auto_wrap);
56 void setFixedHour(const int& i_hour);
57 bool parseField(const std::string& fieldname, const size_t &ii);
58 Date parseDate(const std::vector<std::string>& vecFields);
59 std::string toString() const;
60
61 size_t max_dt_col;
62 bool auto_wrap;
63
64 private:
65 static bool parseDateComponent(const std::vector<std::string>& vecFields, const size_t& idx, int& value);
66 static bool parseDateComponent(const std::vector<std::string>& vecFields, const size_t& idx, double& value);
67 Date parseDate(const std::string& date_time_str) const;
68 Date parseDate(const std::string& value_str, const CsvDateTime::decimal_date_formats& format) const;
69 bool parseDate(const std::string& date_str, float args[3]) const;
70 bool parseTime(const std::string& time_str, float args[3], double& tz) const;
71 double parseTime(const std::string& time_str, double& tz) const;
72
73 static int castToInt(const float &val);
74 static void checkSpecString(const std::string& spec_string, const size_t& nr_params);
75
76 std::vector<size_t> datetime_idx;
77 std::vector<size_t> date_idx;
78 std::vector<size_t> time_idx;
79 std::string datetime_format, date_format, time_format;
80 decimal_date_formats decimal_date_type;
81 double csv_tz;
82 //time is a field that contains numerical time, for example 0920
83 size_t idx_decimal_date, idx_date_time_str, idx_date_str, idx_time_str, idx_year, idx_jdn, idx_month, idx_day, idx_ntime, idx_hours, idx_minutes, idx_seconds;
84 static const int cutoff_year = 40;
85 int year_cst, hour_cst;
86 bool has_tz;
87 bool dt_as_decimal;
88 bool dt_2digits_year;
89 };
90
92 public:
93 CsvParameters(const double& tz_in);
94
95 void setHeaderRepeatMk(const std::string& marker) {header_repeat_mk=marker;}
96 void setDelimiter(const std::string& delim);
97 void setHeaderDelimiter(const std::string& delim);
98 void setSkipFields(const std::string& skipFieldSpecs, const bool& negate);
99 void setUnits(const std::string& csv_units, const char& delim=' ');
100 void setLinesExclusions(const std::vector< LinesRange >& linesSpecs) {linesExclusions=linesSpecs;}
101 void setNodata(const std::string& nodata_markers);
102 void setPurgeChars(const std::string& chars_to_purge);
103 void setFile(const std::string& i_file_and_path, const std::vector<std::string>& vecMetaSpec, const std::string& filename_spec, const std::string& station_idx="");
104 void setLocation(const Coords i_location, const std::string& i_name, const std::string& i_id) {location=i_location; name=i_name; id=i_id;}
105 void setSlope(const double& i_slope, const double& i_azimuth) {slope=i_slope; azi=i_azimuth;}
106 void setDateTimeSpecs(const std::string &datetime_spec, const std::string &date_spec, const std::string &time_spec, const std::string &decimaldate_type);
107 void setFixedYear(const int& i_year, const bool& i_auto_wrap) {date_cols.setFixedYear(i_year, i_auto_wrap);}
108 void setFixedHour(const int& i_hour) {date_cols.setFixedHour(i_hour);}
109
110 std::string toString() const;
111 std::string getFilename() const {return file_and_path;}
112 StationData getStation() const;
113 Date getDate(const std::vector<std::string>& vecFields) {return date_cols.parseDate(vecFields);}
114 bool excludeLine(const size_t& linenr, bool& hasExclusions);
115 bool skipField(const size_t& fieldnr) const;
116 bool hasPurgeChars() const {return !purgeCharsSet.empty();}
117 void purgeChars(std::string &line) {IOUtils::removeChars(line, purgeCharsSet);}
118 bool isNodata(const std::string& value) const;
119
120 std::vector<std::string> csv_fields;
121 std::vector<double> units_offset, units_multiplier;
122 std::vector<double> field_offset, field_multiplier;
123
125 size_t ID_col;
131
132 private:
133 static std::string identifyField(const std::string& fieldname);
134 void assignMetadataVariable(const std::string& field_type, const std::string& field_val, double &lat, double &lon, double &easting, double &northing);
135 void parseFileName(std::string filename, const std::string& filename_spec, double &lat, double &lon, double &easting, double &northing);
136 void parseFields(const std::vector<std::string>& headerFields, std::vector<std::string>& fieldNames);
137 static std::multimap< size_t, std::pair<size_t, std::string> > parseHeadersSpecs(const std::vector<std::string>& vecMetaSpec);
138 void parseSpecialHeaders(const std::string& line, const size_t& linenr, const std::multimap< size_t, std::pair<size_t, std::string> >& meta_spec, double &lat, double &lon, double &easting, double &northing);
139
140 CsvDateTime date_cols;
141 Coords location;
142 std::set<std::string> nodata;
143 std::set<size_t> skip_fields;
144 std::set<char> purgeCharsSet;
145 std::vector< LinesRange > linesExclusions;
146 std::string file_and_path, single_field;
147 std::string name, id;
148 double slope, azi;
149 size_t exclusion_idx;
150 size_t exclusion_last_linenr;
151 size_t last_allowed_field;
152};
153
154} //namespace
155#endif
A class to handle geographic coordinate systems. This class offers an easy way to transparently conve...
Definition: Coords.h:83
class to contain date and time parsing information
Definition: CsvParams.h:31
void setFixedHour(const int &i_hour)
Definition: CsvParams.cc:247
CsvDateTime(const double &tz_in)
Definition: CsvParams.cc:35
void setTimeSpec(const std::string &time_spec)
Definition: CsvParams.cc:188
void setDateTimeSpec(const std::string &datetime_spec)
Definition: CsvParams.cc:114
bool auto_wrap
if true, dates >= October will be assumed to belong to (year_cst-1) until a date < October is encount...
Definition: CsvParams.h:62
decimal_date_formats
Definition: CsvParams.h:36
@ RFC868
RFC 868 date.
Definition: CsvParams.h:41
@ MJULIAN
Modified julian date.
Definition: CsvParams.h:39
@ JULIAN
standard julian date
Definition: CsvParams.h:38
@ MATLAB
Matlab date.
Definition: CsvParams.h:40
@ EXCEL
Excel date.
Definition: CsvParams.h:37
@ UNIX
Unix date.
Definition: CsvParams.h:42
void setFixedYear(const int &i_year, const bool &i_auto_wrap)
Definition: CsvParams.cc:241
int getFixedHour()
Definition: CsvParams.cc:72
void setDateSpec(const std::string &date_spec)
Definition: CsvParams.cc:157
bool isSet() const
Definition: CsvParams.cc:77
Date parseDate(const std::vector< std::string > &vecFields)
Definition: CsvParams.cc:437
size_t max_dt_col
Maximum index of a date/time field (for optimized parsing)
Definition: CsvParams.h:61
std::string toString() const
Definition: CsvParams.cc:533
void setDecimalDateType(std::string i_decimaldate_type)
Definition: CsvParams.cc:220
void updateMaxCol()
Definition: CsvParams.cc:42
bool parseField(const std::string &fieldname, const size_t &ii)
Definition: CsvParams.cc:253
int getFixedYear(const double &i_jdn)
Definition: CsvParams.cc:58
Definition: CsvParams.h:91
size_t header_lines
Definition: CsvParams.h:126
void setLinesExclusions(const std::vector< LinesRange > &linesSpecs)
Definition: CsvParams.h:100
StationData getStation() const
Definition: CsvParams.cc:1106
void setLocation(const Coords i_location, const std::string &i_name, const std::string &i_id)
Definition: CsvParams.h:104
char header_delim
Definition: CsvParams.h:127
size_t columns_headers
Definition: CsvParams.h:126
std::vector< double > field_offset
Definition: CsvParams.h:122
bool asc_order
Definition: CsvParams.h:129
void setFixedHour(const int &i_hour)
Definition: CsvParams.h:108
void setHeaderRepeatMk(const std::string &marker)
Definition: CsvParams.h:95
void setSkipFields(const std::string &skipFieldSpecs, const bool &negate)
Definition: CsvParams.cc:598
void setPurgeChars(const std::string &chars_to_purge)
Definition: CsvParams.cc:847
bool hasPurgeChars() const
Definition: CsvParams.h:116
void purgeChars(std::string &line)
Definition: CsvParams.h:117
char csv_delim
Definition: CsvParams.h:127
size_t units_headers
Definition: CsvParams.h:126
std::string fields_postfix
Definition: CsvParams.h:124
std::vector< double > units_offset
Definition: CsvParams.h:121
Date getDate(const std::vector< std::string > &vecFields)
Definition: CsvParams.h:113
bool excludeLine(const size_t &linenr, bool &hasExclusions)
Definition: CsvParams.cc:869
void setFile(const std::string &i_file_and_path, const std::vector< std::string > &vecMetaSpec, const std::string &filename_spec, const std::string &station_idx="")
Definition: CsvParams.cc:988
bool isNodata(const std::string &value) const
Definition: CsvParams.cc:900
size_t ID_col
Definition: CsvParams.h:125
std::vector< double > field_multiplier
offsets and multipliers to apply to each field
Definition: CsvParams.h:122
bool number_fields
include a column number in the field names as well as an optional field_postfix (this helps when debu...
Definition: CsvParams.h:130
char comments_mk
Definition: CsvParams.h:128
void setFixedYear(const int &i_year, const bool &i_auto_wrap)
Definition: CsvParams.h:107
bool header_repeat_at_start
Definition: CsvParams.h:129
char eoln
Definition: CsvParams.h:128
void setNodata(const std::string &nodata_markers)
Definition: CsvParams.cc:832
CsvParameters(const double &tz_in)
Definition: CsvParams.cc:562
void setHeaderDelimiter(const std::string &delim)
Definition: CsvParams.cc:632
void setUnits(const std::string &csv_units, const char &delim=' ')
Definition: CsvParams.cc:795
void setSlope(const double &i_slope, const double &i_azimuth)
Definition: CsvParams.h:105
std::string getFilename() const
Definition: CsvParams.h:111
void setDateTimeSpecs(const std::string &datetime_spec, const std::string &date_spec, const std::string &time_spec, const std::string &decimaldate_type)
Definition: CsvParams.cc:908
void setDelimiter(const std::string &delim)
Definition: CsvParams.cc:620
std::string header_repeat_mk
Definition: CsvParams.h:124
bool skipField(const size_t &fieldnr) const
Definition: CsvParams.cc:892
std::string toString() const
Definition: CsvParams.cc:568
std::string filter_ID
Definition: CsvParams.h:124
std::vector< std::string > csv_fields
the user provided list of field names
Definition: CsvParams.h:120
std::vector< double > units_multiplier
offsets and multipliers to convert the data to SI
Definition: CsvParams.h:121
A class to handle timestamps. This class handles conversion between different time display formats (I...
Definition: Date.h:87
A class to represent meteo stations with attributes like longitude, latitude, etc.
Definition: StationData.h:41
void removeChars(std::string &line, const std::set< char > &specialChars)
Removes any character present in the provided set from the given line.
Definition: IOUtils.cc:241
Definition: Config.cc:31