MeteoIODoc 20251207.a703fe45
Environmental timeseries pre-processing
Loading...
Searching...
No Matches
ARIMAutils.h
Go to the documentation of this file.
1// SPDX-License-Identifier: LGPL-3.0-or-later
2/***********************************************************************************/
3/* Copyright 2013 WSL Institute for Snow and Avalanche Research SLF-DAVOS */
4/***********************************************************************************/
5/* This file is part of MeteoIO.
6 MeteoIO is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 MeteoIO is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with MeteoIO. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef UTILS_H
20#define UTILS_H
21
22#include <cassert>
23#include <meteoio/MeteoIO.h>
24#include <vector>
25#include <iomanip>
26
27static const double DATE_TOLERANCE = 1e-6;
28static const int MIN_ARIMA_DATA_POINTS = 8;
29static const int MAX_ARIMA_EXTRAPOLATION = 48; // is two days with hourly data
30
31namespace mio {
32
33 namespace ARIMAutils {
34
40 extern const std::map<ObjectiveFunction, std::string> ObjectiveFunctionMap;
41
52 extern const std::map<OptimizationMethod, std::string> OptimizationMethodMap;
53
54 // a struct to hold the coefficients for normalization and denormalization of a time series, cannot be used
55 // for multiple time series
57 public:
64 Normalization(const std::vector<double>& data);
65 Normalization(const std::vector<double>& data, const Mode& new_mode);
66
67 void setMode(const Mode& new_mode) {mode = new_mode;}
68 Mode getMode() {return mode;}
69 std::vector<double> normalize(const std::vector<double>& data);
70 std::vector<double> denormalize(const std::vector<double>& data);
71
72 private:
73 double mean;
74 double std;
75 double min;
76 double max;
77 Mode mode = Nothing;
78 };
79
80 // slice a vector from start to start+N
81 std::vector<double> slice(const std::vector<double> &vec, const size_t& start, const size_t& N);
82
83 // slice a vector from start to end
84 std::vector<double> slice(const std::vector<double> &vec, const size_t& start);
85
86 // np.arange for c++
87 std::vector<double> arange(const size_t& start, const size_t& N);
88
89 template <typename T> T findMinMax(const std::vector<T> &vec, bool findMin) {
90 assert(!vec.empty()); // Ensure the vector is not empty
91
92 T extremeValue = vec[0];
93 for (const auto &value : vec) {
94 if (findMin ? value < extremeValue : value > extremeValue) {
95 extremeValue = value;
96 }
97 }
98 return extremeValue;
99 }
100
101 // calculate the of a vector
102 double calcVecMean(const std::vector<double> &vec);
103
104 // calculate the standard deviation of a vector
105 double stdDev(const std::vector<double> &vec);
106
107 // reverse a vector in place
108 template <typename T> void reverseVector(std::vector<T> &vec) {
109 if (vec.empty()) {
110 throw std::invalid_argument("Cannot reverse an empty vector");
111 }
112 int start = 0;
113 int end = int(vec.size()) - 1;
114
115 while (start < end) {
116 std::swap(vec[start], vec[end]);
117 start++;
118 end--;
119 }
120 }
121
122 // reverse a vector and return it
123 template <typename T> std::vector<T> reverseVectorReturn(const std::vector<T> &vec) {
124 if (vec.empty()) {
125 throw std::invalid_argument("Cannot reverse an empty vector");
126 }
127 std::vector<T> reversed_vec = vec;
128 int start = 0;
129 int end = int(reversed_vec.size()) - 1;
130
131 while (start < end) {
132 std::swap(reversed_vec[start], reversed_vec[end]);
133 start++;
134 end--;
135 }
136
137 return reversed_vec;
138 }
139
140 // converts a vector of MeteoData to a vector of doubles
141 std::vector<double> toVector(const std::vector<MeteoData> &vecM, const std::string &paramname);
142
143 // converts a vector of MeteoData to a vector of doubles
144 std::vector<double> toVector(const std::vector<MeteoData> &vecM, const size_t &paramindex);
145
146 // helper to parse direction argument for interpolarima
147 std::vector<double> decideDirection(const std::vector<double> &data, const std::string &direction, bool forward, size_t gap_loc,
148 size_t length);
149
150 // a struct to cache information about a gap
151 struct ARIMA_GAP {
153 void extend(const size_t &idx, const std::vector<MeteoData> &vecM) {
154 if (idx < start)
155 setStart(idx, vecM);
156 if (idx > end)
157 setEnd(idx, vecM);
158 }
159 void setStart(const size_t &idx, const std::vector<MeteoData> &vecM) {
160 if (idx >= vecM.size())
161 return;
162 start = idx;
163 startDate = vecM[idx].date;
164 }
165 void setEnd(const size_t &idx, const std::vector<MeteoData> &vecM) {
166 if (idx >= vecM.size())
167 return;
168 end = idx;
169 endDate = vecM[idx].date;
170 }
171 void reset() {
174 startDate = Date();
175 endDate = Date();
176 }
177 size_t start, end;
180 bool isGap() {
181 return (endDate - startDate).getJulian(true) * sampling_rate >= 2;
182 } // TODO: should i always do arima prediction?
183 std::string toString() const {
184 std::ostringstream os;
185 os << "ARIMA_GAP: {\n"
186 << "\tStart Date: " << startDate.toString(Date::ISO) << ",\n"
187 << "\tEnd Date: " << endDate.toString(Date::ISO) << ",\n"
188 << "\tSampling Rate: " << sampling_rate << ",\n"
189 << "}";
190 return os.str();
191 }
192 };
193
194 // return true if a valid point could be found backward from pos
195 size_t searchBackward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector<MeteoData> &vecM,
196 const Date &resampling_date, const double &i_window_size);
197
198 // return true if a valid point could be found forward from pos
199 size_t searchForward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector<MeteoData> &vecM,
200 const Date &resampling_date, const double &i_window_size, const size_t &indexP1);
201
202 void computeARIMAGap(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector<MeteoData> &vecM,
203 const Date &resampling_date, size_t &indexP1, size_t &indexP2, double &before_window, double &after_window,
204 double &window_size, Date &data_start_date, Date &data_end_date);
205
206 // roughly equal between two dates, given a tolerance level
207 bool requal(const Date &date1, const Date &date2);
208
209 // returns the most often accuring value in a vector
210 double mostLikelyValue(const std::vector<double> &vec);
211
212 // compute the most often occuring sampling rate rounded to 1e-6
213 double computeSamplingRate(Date data_start_date, Date data_end_date, std::vector<MeteoData> vecM);
214
215 Date findFirstDateWithSamplingRate(const std::vector<MeteoData> &vecM, const double sampling_rate, const Date &data_start_date,
216 Date &data_end_date);
217 Date adjustStartDate(const std::vector<MeteoData> &vecM, const ARIMA_GAP &last_gap, Date data_start_date,
218 const Date &data_end_date);
219
220 template <typename T> std::string convertVectorsToString(const std::vector<std::vector<T>> &vecs) {
221 std::ostringstream oss;
222 size_t maxSize = 0;
223 for (const auto &vec : vecs) {
224 maxSize = std::max(maxSize, vec.size());
225 }
226
227 // Print headers
228 for (size_t i = 0; i < vecs.size(); i++) {
229 oss << std::left << std::setw(10) << "Vector" + std::to_string(i + 1);
230 }
231 oss << std::endl;
232 oss << std::string(vecs.size() * 10, '-') << std::endl;
233
234 for (size_t i = 0; i < maxSize; i++) {
235 for (const auto &vec : vecs) {
236 // Print elements from vec or "NaN" if out of range
237 if (i < vec.size()) {
238 oss << std::left << std::setw(10) << vec[i];
239 } else {
240 oss << std::left << std::setw(10) << "NaN";
241 }
242 }
243 oss << std::endl;
244 }
245 return oss.str();
246 }
247
248 template <typename T> void printVectors(const std::vector<std::vector<T>> &vecs) {
249 size_t maxSize = 0;
250 for (const auto &vec : vecs) {
251 maxSize = std::max(maxSize, vec.size());
252 }
253
254 // Print headers
255 for (size_t i = 0; i < vecs.size(); i++) {
256 std::cout << std::left << std::setw(10) << "Vector" + std::to_string(i + 1);
257 }
258 std::cout << std::endl;
259 std::cout << std::string(vecs.size() * 10, '-') << std::endl;
260
261 for (size_t i = 0; i < maxSize; i++) {
262 for (const auto &vec : vecs) {
263 // Print elements from vec or "NaN" if out of range
264 if (i < vec.size()) {
265 std::cout << std::left << std::setw(10) << vec[i];
266 } else {
267 std::cout << std::left << std::setw(10) << "NaN";
268 }
269 }
270 std::cout << std::endl;
271 }
272 }
273
274 template <typename T> void printVectors(const std::vector<Date> &vec1, const std::vector<T> &vec2) {
275 size_t maxSize = std::max(vec1.size(), vec2.size());
276
277 // Print headers
278 std::cout << std::left << std::setw(30) << "Date1"
279 << "| Date2" << std::endl;
280 std::cout << "--------------------------------------------------" << std::endl;
281
282 for (size_t i = 0; i < maxSize; i++) {
283 // Print date from vec1 or "NaN" if out of range
284 if (i < vec1.size()) {
285 std::cout << std::left << std::setw(30) << vec1[i].toString(Date::ISO) << "| ";
286 } else {
287 std::cout << std::left << std::setw(30) << "NaN"
288 << "| ";
289 }
290
291 // Print date from vec2 or "NaN" if out of range
292 if (i < vec2.size()) {
293 std::cout << vec2[i] << std::endl;
294 } else {
295 std::cout << "NaN" << std::endl;
296 }
297 }
298 }
299
300 } // namespace ARIMAutils
301} // namespace mio
302#endif // UTILS_H
static const int MAX_ARIMA_EXTRAPOLATION
Definition ARIMAutils.h:29
static const int MIN_ARIMA_DATA_POINTS
Definition ARIMAutils.h:28
static const double DATE_TOLERANCE
Definition ARIMAutils.h:27
Definition ARIMAutils.h:56
Mode getMode()
Definition ARIMAutils.h:68
void setMode(const Mode &new_mode)
Definition ARIMAutils.h:67
Normalization()
Definition ARIMAutils.cc:47
std::vector< double > normalize(const std::vector< double > &data)
Definition ARIMAutils.cc:53
std::vector< double > denormalize(const std::vector< double > &data)
Definition ARIMAutils.cc:80
Mode
Definition ARIMAutils.h:58
@ ZScore
Definition ARIMAutils.h:60
@ MinMax
Definition ARIMAutils.h:59
@ Nothing
Definition ARIMAutils.h:61
A class to handle timestamps. This class handles conversion between different time display formats (I...
Definition Date.h:87
const std::string toString(const FORMATS &type, const bool &gmt=false) const
Return a nicely formated string.
Definition Date.cc:1176
@ ISO
ISO 8601 extended format combined date: YYYY-MM-DDTHH:mm:SS.sss (fields might be dropped,...
Definition Date.h:91
size_t searchForward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector< MeteoData > &vecM, const Date &resampling_date, const double &i_window_size, const size_t &indexP1)
Definition ARIMAutils.cc:243
double stdDev(const std::vector< double > &vec)
Definition ARIMAutils.cc:144
const std::map< ObjectiveFunction, std::string > ObjectiveFunctionMap
Definition ARIMAutils.cc:31
std::vector< double > arange(const size_t &start, const size_t &N)
Definition ARIMAutils.cc:122
void computeARIMAGap(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector< MeteoData > &vecM, const Date &resampling_date, size_t &indexP1, size_t &indexP2, double &before_window, double &after_window, double &window_size, Date &data_start_date, Date &data_end_date)
Definition ARIMAutils.cc:329
std::vector< double > decideDirection(const std::vector< double > &data, const std::string &direction, bool forward, size_t gap_loc, size_t length)
Definition ARIMAutils.cc:177
std::vector< double > slice(const std::vector< double > &vec, const size_t &start, const size_t &N)
Definition ARIMAutils.cc:106
static Date findFirstDateWithSamplingRate(const std::vector< MeteoData > &vecM, const double sampling_rate, const Date &data_start_date, const Date &data_end_date)
Definition ARIMAutils.cc:397
double calcVecMean(const std::vector< double > &vec)
Definition ARIMAutils.cc:131
size_t searchBackward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector< MeteoData > &vecM, const Date &resampling_date, const double &i_window_size)
Definition ARIMAutils.cc:195
std::vector< double > toVector(const std::vector< MeteoData > &vecM, const std::string &paramname)
Definition ARIMAutils.cc:158
std::string convertVectorsToString(const std::vector< std::vector< T > > &vecs)
Definition ARIMAutils.h:220
Date adjustStartDate(const std::vector< MeteoData > &vecM, const ARIMA_GAP &last_gap, Date data_start_date, const Date &data_end_date)
Definition ARIMAutils.cc:415
OptimizationMethod
Definition ARIMAutils.h:42
@ Conjugate_Gradient
Definition ARIMAutils.h:47
@ LBFGS
Definition ARIMAutils.h:49
@ Newton_Trust_Region_Hook_Step
Definition ARIMAutils.h:45
@ Newton_Trust_Region_Double_Dog_Leg
Definition ARIMAutils.h:46
@ BFGS_MTM
Definition ARIMAutils.h:50
@ Nelder_Mead
Definition ARIMAutils.h:43
@ Newton_Line_Search
Definition ARIMAutils.h:44
@ BFGS
Definition ARIMAutils.h:48
ObjectiveFunction
Definition ARIMAutils.h:35
@ MLE
Definition ARIMAutils.h:37
@ CSS_MLE
Definition ARIMAutils.h:36
@ CSS
Definition ARIMAutils.h:38
const std::map< OptimizationMethod, std::string > OptimizationMethodMap
Definition ARIMAutils.cc:36
std::vector< T > reverseVectorReturn(const std::vector< T > &vec)
Definition ARIMAutils.h:123
void reverseVector(std::vector< T > &vec)
Definition ARIMAutils.h:108
double computeSamplingRate(Date data_start_date, Date data_end_date, std::vector< MeteoData > vecM)
Definition ARIMAutils.cc:381
T findMinMax(const std::vector< T > &vec, bool findMin)
Definition ARIMAutils.h:89
void printVectors(const std::vector< std::vector< T > > &vecs)
Definition ARIMAutils.h:248
double mostLikelyValue(const std::vector< double > &vec)
Definition ARIMAutils.cc:366
bool requal(const Date &date1, const Date &date2)
Definition ARIMAutils.cc:300
const size_t npos
npos is the out-of-range value
Definition IOUtils.h:81
Definition Config.cc:34
static double forward(double x, const std::vector< double > &params, EditingRegFill::RegressionType regtype)
Definition DataEditingAlgorithms.cc:1172
Definition ARIMAutils.h:151
ARIMA_GAP()
Definition ARIMAutils.h:152
void setEnd(const size_t &idx, const std::vector< MeteoData > &vecM)
Definition ARIMAutils.h:165
Date endDate
Definition ARIMAutils.h:178
bool isGap()
Definition ARIMAutils.h:180
void extend(const size_t &idx, const std::vector< MeteoData > &vecM)
Definition ARIMAutils.h:153
std::string toString() const
Definition ARIMAutils.h:183
void setStart(const size_t &idx, const std::vector< MeteoData > &vecM)
Definition ARIMAutils.h:159
size_t end
Definition ARIMAutils.h:177
Date startDate
Definition ARIMAutils.h:178
void reset()
Definition ARIMAutils.h:171
double sampling_rate
Definition ARIMAutils.h:179
size_t start
Definition ARIMAutils.h:177