MeteoIODoc 20250312.660e6d76
ARIMAutils.h
Go to the documentation of this file.
1// SPDX-License-Identifier: LGPL-3.0-or-later
2/***********************************************************************************/
3/* Copyright 2013 WSL Institute for Snow and Avalanche Research SLF-DAVOS */
4/***********************************************************************************/
5/* This file is part of MeteoIO.
6 MeteoIO is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 MeteoIO is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with MeteoIO. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef UTILS_H
20#define UTILS_H
21
22#include <cassert>
23#include <meteoio/MeteoIO.h>
24#include <vector>
25
26static const double DATE_TOLERANCE = 1e-6;
27static const int MIN_ARIMA_DATA_POINTS = 8;
28static const int MAX_ARIMA_EXTRAPOLATION = 48; // is two days with hourly data
29
30namespace mio {
31
32 namespace ARIMAutils {
33
38 };
39 extern const std::map<ObjectiveFunction, std::string> ObjectiveFunctionMap;
40
50 };
51 extern const std::map<OptimizationMethod, std::string> OptimizationMethodMap;
52
53 // a struct to hold the coefficients for normalization and denormalization of a time series, cannot be used
54 // for multiple time series
56 public:
57 enum Mode {
61 };
63 Normalization(const std::vector<double>& data);
64 Normalization(const std::vector<double>& data, const Mode& new_mode);
65
66 void setMode(const Mode& new_mode) {mode = new_mode;}
67 Mode getMode() {return mode;}
68 std::vector<double> normalize(const std::vector<double>& data);
69 std::vector<double> denormalize(const std::vector<double>& data);
70
71 private:
72 double mean;
73 double std;
74 double min;
75 double max;
76 Mode mode = Nothing;
77 };
78
79 // slice a vector from start to start+N
80 std::vector<double> slice(const std::vector<double> &vec, const size_t& start, const size_t& N);
81
82 // slice a vector from start to end
83 std::vector<double> slice(const std::vector<double> &vec, const size_t& start);
84
85 // np.arange for c++
86 std::vector<double> arange(const size_t& start, const size_t& N);
87
88 template <typename T> T findMinMax(const std::vector<T> &vec, bool findMin) {
89 assert(!vec.empty()); // Ensure the vector is not empty
90
91 T extremeValue = vec[0];
92 for (const auto &value : vec) {
93 if (findMin ? value < extremeValue : value > extremeValue) {
94 extremeValue = value;
95 }
96 }
97 return extremeValue;
98 }
99
100 // calculate the of a vector
101 double calcVecMean(const std::vector<double> &vec);
102
103 // calculate the standard deviation of a vector
104 double stdDev(const std::vector<double> &vec);
105
106 // reverse a vector in place
107 template <typename T> void reverseVector(std::vector<T> &vec) {
108 if (vec.empty()) {
109 throw std::invalid_argument("Cannot reverse an empty vector");
110 }
111 int start = 0;
112 int end = int(vec.size()) - 1;
113
114 while (start < end) {
115 std::swap(vec[start], vec[end]);
116 start++;
117 end--;
118 }
119 }
120
121 // reverse a vector and return it
122 template <typename T> std::vector<T> reverseVectorReturn(const std::vector<T> &vec) {
123 if (vec.empty()) {
124 throw std::invalid_argument("Cannot reverse an empty vector");
125 }
126 std::vector<T> reversed_vec = vec;
127 int start = 0;
128 int end = int(reversed_vec.size()) - 1;
129
130 while (start < end) {
131 std::swap(reversed_vec[start], reversed_vec[end]);
132 start++;
133 end--;
134 }
135
136 return reversed_vec;
137 }
138
139 // converts a vector of MeteoData to a vector of doubles
140 std::vector<double> toVector(const std::vector<MeteoData> &vecM, const std::string &paramname);
141
142 // converts a vector of MeteoData to a vector of doubles
143 std::vector<double> toVector(const std::vector<MeteoData> &vecM, const size_t &paramindex);
144
145 // helper to parse direction argument for interpolarima
146 std::vector<double> decideDirection(const std::vector<double> &data, const std::string &direction, bool forward, size_t gap_loc,
147 size_t length);
148
149 // a struct to cache information about a gap
150 struct ARIMA_GAP {
152 void extend(const size_t &idx, const std::vector<MeteoData> &vecM) {
153 if (idx < start)
154 setStart(idx, vecM);
155 if (idx > end)
156 setEnd(idx, vecM);
157 }
158 void setStart(const size_t &idx, const std::vector<MeteoData> &vecM) {
159 if (idx >= vecM.size())
160 return;
161 start = idx;
162 startDate = vecM[idx].date;
163 }
164 void setEnd(const size_t &idx, const std::vector<MeteoData> &vecM) {
165 if (idx >= vecM.size())
166 return;
167 end = idx;
168 endDate = vecM[idx].date;
169 }
170 void reset() {
173 startDate = Date();
174 endDate = Date();
175 }
176 size_t start, end;
179 bool isGap() {
180 return (endDate - startDate).getJulian(true) * sampling_rate >= 2;
181 } // TODO: should i always do arima prediction?
182 std::string toString() const {
183 std::ostringstream os;
184 os << "ARIMA_GAP: {\n"
185 << "\tStart Date: " << startDate.toString(Date::ISO) << ",\n"
186 << "\tEnd Date: " << endDate.toString(Date::ISO) << ",\n"
187 << "\tSampling Rate: " << sampling_rate << ",\n"
188 << "}";
189 return os.str();
190 }
191 };
192
193 // return true if a valid point could be found backward from pos
194 size_t searchBackward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector<MeteoData> &vecM,
195 const Date &resampling_date, const double &i_window_size);
196
197 // return true if a valid point could be found forward from pos
198 size_t searchForward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector<MeteoData> &vecM,
199 const Date &resampling_date, const double &i_window_size, const size_t &indexP1);
200
201 void computeARIMAGap(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector<MeteoData> &vecM,
202 const Date &resampling_date, size_t &indexP1, size_t &indexP2, double &before_window, double &after_window,
203 double &window_size, Date &data_start_date, Date &data_end_date);
204
205 // roughly equal between two dates, given a tolerance level
206 bool requal(const Date &date1, const Date &date2);
207
208 // returns the most often accuring value in a vector
209 double mostLikelyValue(const std::vector<double> &vec);
210
211 // compute the most often occuring sampling rate rounded to 1e-6
212 double computeSamplingRate(Date data_start_date, Date data_end_date, std::vector<MeteoData> vecM);
213
214 Date findFirstDateWithSamplingRate(const std::vector<MeteoData> &vecM, const double sampling_rate, const Date &data_start_date,
215 Date &data_end_date);
216 Date adjustStartDate(const std::vector<MeteoData> &vecM, const ARIMA_GAP &last_gap, Date data_start_date,
217 const Date &data_end_date);
218
219 template <typename T> std::string convertVectorsToString(const std::vector<std::vector<T>> &vecs) {
220 std::ostringstream oss;
221 size_t maxSize = 0;
222 for (const auto &vec : vecs) {
223 maxSize = std::max(maxSize, vec.size());
224 }
225
226 // Print headers
227 for (size_t i = 0; i < vecs.size(); i++) {
228 oss << std::left << std::setw(10) << "Vector" + std::to_string(i + 1);
229 }
230 oss << std::endl;
231 oss << std::string(vecs.size() * 10, '-') << std::endl;
232
233 for (size_t i = 0; i < maxSize; i++) {
234 for (const auto &vec : vecs) {
235 // Print elements from vec or "NaN" if out of range
236 if (i < vec.size()) {
237 oss << std::left << std::setw(10) << vec[i];
238 } else {
239 oss << std::left << std::setw(10) << "NaN";
240 }
241 }
242 oss << std::endl;
243 }
244 return oss.str();
245 }
246
247 template <typename T> void printVectors(const std::vector<std::vector<T>> &vecs) {
248 size_t maxSize = 0;
249 for (const auto &vec : vecs) {
250 maxSize = std::max(maxSize, vec.size());
251 }
252
253 // Print headers
254 for (size_t i = 0; i < vecs.size(); i++) {
255 std::cout << std::left << std::setw(10) << "Vector" + std::to_string(i + 1);
256 }
257 std::cout << std::endl;
258 std::cout << std::string(vecs.size() * 10, '-') << std::endl;
259
260 for (size_t i = 0; i < maxSize; i++) {
261 for (const auto &vec : vecs) {
262 // Print elements from vec or "NaN" if out of range
263 if (i < vec.size()) {
264 std::cout << std::left << std::setw(10) << vec[i];
265 } else {
266 std::cout << std::left << std::setw(10) << "NaN";
267 }
268 }
269 std::cout << std::endl;
270 }
271 }
272
273 template <typename T> void printVectors(const std::vector<Date> &vec1, const std::vector<T> &vec2) {
274 size_t maxSize = std::max(vec1.size(), vec2.size());
275
276 // Print headers
277 std::cout << std::left << std::setw(30) << "Date1"
278 << "| Date2" << std::endl;
279 std::cout << "--------------------------------------------------" << std::endl;
280
281 for (size_t i = 0; i < maxSize; i++) {
282 // Print date from vec1 or "NaN" if out of range
283 if (i < vec1.size()) {
284 std::cout << std::left << std::setw(30) << vec1[i].toString(Date::ISO) << "| ";
285 } else {
286 std::cout << std::left << std::setw(30) << "NaN"
287 << "| ";
288 }
289
290 // Print date from vec2 or "NaN" if out of range
291 if (i < vec2.size()) {
292 std::cout << vec2[i] << std::endl;
293 } else {
294 std::cout << "NaN" << std::endl;
295 }
296 }
297 }
298
299 } // namespace ARIMAutils
300} // namespace mio
301#endif // UTILS_H
static const int MAX_ARIMA_EXTRAPOLATION
Definition: ARIMAutils.h:28
static const int MIN_ARIMA_DATA_POINTS
Definition: ARIMAutils.h:27
static const double DATE_TOLERANCE
Definition: ARIMAutils.h:26
Definition: ARIMAutils.h:55
Mode getMode()
Definition: ARIMAutils.h:67
void setMode(const Mode &new_mode)
Definition: ARIMAutils.h:66
Normalization()
Definition: ARIMAutils.cc:47
std::vector< double > normalize(const std::vector< double > &data)
Definition: ARIMAutils.cc:53
std::vector< double > denormalize(const std::vector< double > &data)
Definition: ARIMAutils.cc:80
Mode
Definition: ARIMAutils.h:57
@ ZScore
Definition: ARIMAutils.h:59
@ MinMax
Definition: ARIMAutils.h:58
@ Nothing
Definition: ARIMAutils.h:60
A class to handle timestamps. This class handles conversion between different time display formats (I...
Definition: Date.h:87
const std::string toString(const FORMATS &type, const bool &gmt=false) const
Return a nicely formated string.
Definition: Date.cc:1128
@ ISO
ISO 8601 extended format combined date: YYYY-MM-DDTHH:mm:SS.sss (fields might be dropped,...
Definition: Date.h:91
size_t searchForward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector< MeteoData > &vecM, const Date &resampling_date, const double &i_window_size, const size_t &indexP1)
Definition: ARIMAutils.cc:243
double stdDev(const std::vector< double > &vec)
Definition: ARIMAutils.cc:144
const std::map< ObjectiveFunction, std::string > ObjectiveFunctionMap
Definition: ARIMAutils.cc:31
std::vector< double > arange(const size_t &start, const size_t &N)
Definition: ARIMAutils.cc:122
void computeARIMAGap(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector< MeteoData > &vecM, const Date &resampling_date, size_t &indexP1, size_t &indexP2, double &before_window, double &after_window, double &window_size, Date &data_start_date, Date &data_end_date)
Definition: ARIMAutils.cc:329
std::vector< double > decideDirection(const std::vector< double > &data, const std::string &direction, bool forward, size_t gap_loc, size_t length)
Definition: ARIMAutils.cc:177
std::vector< double > slice(const std::vector< double > &vec, const size_t &start, const size_t &N)
Definition: ARIMAutils.cc:106
static Date findFirstDateWithSamplingRate(const std::vector< MeteoData > &vecM, const double sampling_rate, const Date &data_start_date, const Date &data_end_date)
Definition: ARIMAutils.cc:397
double calcVecMean(const std::vector< double > &vec)
Definition: ARIMAutils.cc:131
size_t searchBackward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector< MeteoData > &vecM, const Date &resampling_date, const double &i_window_size)
Definition: ARIMAutils.cc:195
std::vector< double > toVector(const std::vector< MeteoData > &vecM, const std::string &paramname)
Definition: ARIMAutils.cc:158
std::string convertVectorsToString(const std::vector< std::vector< T > > &vecs)
Definition: ARIMAutils.h:219
Date adjustStartDate(const std::vector< MeteoData > &vecM, const ARIMA_GAP &last_gap, Date data_start_date, const Date &data_end_date)
Definition: ARIMAutils.cc:415
OptimizationMethod
Definition: ARIMAutils.h:41
@ Conjugate_Gradient
Definition: ARIMAutils.h:46
@ LBFGS
Definition: ARIMAutils.h:48
@ Newton_Trust_Region_Hook_Step
Definition: ARIMAutils.h:44
@ Newton_Trust_Region_Double_Dog_Leg
Definition: ARIMAutils.h:45
@ BFGS_MTM
Definition: ARIMAutils.h:49
@ Nelder_Mead
Definition: ARIMAutils.h:42
@ Newton_Line_Search
Definition: ARIMAutils.h:43
@ BFGS
Definition: ARIMAutils.h:47
ObjectiveFunction
Definition: ARIMAutils.h:34
@ MLE
Definition: ARIMAutils.h:36
@ CSS_MLE
Definition: ARIMAutils.h:35
@ CSS
Definition: ARIMAutils.h:37
const std::map< OptimizationMethod, std::string > OptimizationMethodMap
Definition: ARIMAutils.cc:36
std::vector< T > reverseVectorReturn(const std::vector< T > &vec)
Definition: ARIMAutils.h:122
void reverseVector(std::vector< T > &vec)
Definition: ARIMAutils.h:107
double computeSamplingRate(Date data_start_date, Date data_end_date, std::vector< MeteoData > vecM)
Definition: ARIMAutils.cc:381
T findMinMax(const std::vector< T > &vec, bool findMin)
Definition: ARIMAutils.h:88
void printVectors(const std::vector< std::vector< T > > &vecs)
Definition: ARIMAutils.h:247
double mostLikelyValue(const std::vector< double > &vec)
Definition: ARIMAutils.cc:366
bool requal(const Date &date1, const Date &date2)
Definition: ARIMAutils.cc:300
static const double e
Definition: Meteoconst.h:72
const size_t npos
npos is the out-of-range value
Definition: IOUtils.h:80
Definition: Config.cc:31
static double forward(double x, const std::vector< double > &params, EditingRegFill::RegressionType regtype)
Definition: DataEditingAlgorithms.cc:1172
Definition: ARIMAutils.h:150
ARIMA_GAP()
Definition: ARIMAutils.h:151
void setEnd(const size_t &idx, const std::vector< MeteoData > &vecM)
Definition: ARIMAutils.h:164
Date endDate
Definition: ARIMAutils.h:177
bool isGap()
Definition: ARIMAutils.h:179
void extend(const size_t &idx, const std::vector< MeteoData > &vecM)
Definition: ARIMAutils.h:152
std::string toString() const
Definition: ARIMAutils.h:182
void setStart(const size_t &idx, const std::vector< MeteoData > &vecM)
Definition: ARIMAutils.h:158
size_t end
Definition: ARIMAutils.h:176
Date startDate
Definition: ARIMAutils.h:177
void reset()
Definition: ARIMAutils.h:170
double sampling_rate
Definition: ARIMAutils.h:178
size_t start
Definition: ARIMAutils.h:176