MeteoIODoc 20240518.aefd3c94
ARIMAutils.h
Go to the documentation of this file.
1// SPDX-License-Identifier: LGPL-3.0-or-later
2/***********************************************************************************/
3/* Copyright 2013 WSL Institute for Snow and Avalanche Research SLF-DAVOS */
4/***********************************************************************************/
5/* This file is part of MeteoIO.
6 MeteoIO is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 MeteoIO is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with MeteoIO. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef UTILS_H
20#define UTILS_H
21
22#include <cassert>
23#include <meteoio/MeteoIO.h>
24#include <vector>
25
26static const double DATE_TOLERANCE = 1e-6;
27static const int MIN_ARIMA_DATA_POINTS = 8;
28static const int MAX_ARIMA_EXTRAPOLATION = 100;
29
30namespace mio {
31
32 namespace ARIMAutils {
33
38 };
48 };
49
50 // a struct to hold the coefficients for normalization and denormalization of a time series, cannot be used
51 // for multiple time series
53 public:
54 enum Mode {
58 };
60 Normalization(std::vector<double>& data, Mode new_mode);
61 Normalization(std::vector<double>& data);
62
63 void setMode(Mode new_mode) {mode = new_mode;}
64 Mode getMode() {return mode;}
65 std::vector<double> normalize(const std::vector<double>& data);
66 std::vector<double> denormalize(const std::vector<double>& data);
67
68 private:
69 double mean;
70 double std;
71 double min;
72 double max;
73 Mode mode = MinMax;
74 };
75
76 // slice a vector from start to start+N
77 std::vector<double> slice(const std::vector<double> &vec, size_t start, size_t N);
78
79 // slice a vector from start to end
80 std::vector<double> slice(const std::vector<double> &vec, size_t start);
81
82 // np.arange for c++
83 std::vector<double> arange(size_t start, size_t N);
84
85 template <typename T> T findMinMax(const std::vector<T> &vec, bool findMin) {
86 assert(!vec.empty()); // Ensure the vector is not empty
87
88 T extremeValue = vec[0];
89 for (const auto &value : vec) {
90 if (findMin ? value < extremeValue : value > extremeValue) {
91 extremeValue = value;
92 }
93 }
94 return extremeValue;
95 }
96
97 // calculate the of a vector
98 double calcVecMean(const std::vector<double> &vec);
99
100 // calculate the standard deviation of a vector
101 double stdDev(const std::vector<double> &vec);
102
103 // reverse a vector in place
104 template <typename T> void reverseVector(std::vector<T> &vec) {
105 if (vec.empty()) {
106 throw std::invalid_argument("Cannot reverse an empty vector");
107 }
108 int start = 0;
109 int end = int(vec.size()) - 1;
110
111 while (start < end) {
112 std::swap(vec[start], vec[end]);
113 start++;
114 end--;
115 }
116 }
117
118 // reverse a vector and return it
119 template <typename T> std::vector<T> reverseVectorReturn(const std::vector<T> &vec) {
120 if (vec.empty()) {
121 throw std::invalid_argument("Cannot reverse an empty vector");
122 }
123 std::vector<T> reversed_vec = vec;
124 int start = 0;
125 int end = int(reversed_vec.size()) - 1;
126
127 while (start < end) {
128 std::swap(reversed_vec[start], reversed_vec[end]);
129 start++;
130 end--;
131 }
132
133 return reversed_vec;
134 }
135
136 // converts a vector of MeteoData to a vector of doubles
137 std::vector<double> toVector(const std::vector<MeteoData> &vecM, const std::string &paramname);
138
139 // converts a vector of MeteoData to a vector of doubles
140 std::vector<double> toVector(const std::vector<MeteoData> &vecM, const size_t &paramindex);
141
142 // helper to parse direction argument for interpolarima
143 std::vector<double> decideDirection(const std::vector<double> &data, const std::string &direction, bool forward, size_t gap_loc,
144 size_t length);
145
146 // a struct to cache information about a gap
147 struct ARIMA_GAP {
149 void extend(const size_t &idx, const std::vector<MeteoData> &vecM) {
150 if (idx < start)
151 setStart(idx, vecM);
152 if (idx > end)
153 setEnd(idx, vecM);
154 }
155 void setStart(const size_t &idx, const std::vector<MeteoData> &vecM) {
156 if (idx >= vecM.size())
157 return;
158 start = idx;
159 startDate = vecM[idx].date;
160 }
161 void setEnd(const size_t &idx, const std::vector<MeteoData> &vecM) {
162 if (idx >= vecM.size())
163 return;
164 end = idx;
165 endDate = vecM[idx].date;
166 }
167 void reset() {
170 startDate = Date();
171 endDate = Date();
172 }
173 size_t start, end;
176 bool isGap() {
177 return (endDate - startDate).getJulian(true) * sampling_rate >= 2;
178 } // TODO: should i always do arima prediction?
179 std::string toString() const {
180 std::ostringstream os;
181 os << "ARIMA_GAP: {\n"
182 << "\tStart Date: " << startDate.toString(Date::ISO) << ",\n"
183 << "\tEnd Date: " << endDate.toString(Date::ISO) << ",\n"
184 << "\tSampling Rate: " << sampling_rate << ",\n"
185 << "}";
186 return os.str();
187 }
188 };
189
190 // return true if a valid point could be found backward from pos
191 size_t searchBackward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector<MeteoData> &vecM,
192 const Date &resampling_date, const double &i_window_size);
193
194 // return true if a valid point could be found forward from pos
195 size_t searchForward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector<MeteoData> &vecM,
196 const Date &resampling_date, const double &i_window_size, const size_t &indexP1);
197
198 void computeARIMAGap(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector<MeteoData> &vecM,
199 const Date &resampling_date, size_t &indexP1, size_t &indexP2, double &before_window, double &after_window,
200 double &window_size, Date &data_start_date, Date &data_end_date);
201
202 // roughly equal between two dates, given a tolerance level
203 bool requal(const Date &date1, const Date &date2);
204
205 // returns the most often accuring value in a vector
206 double mostLikelyValue(const std::vector<double> &vec);
207
208 // compute the most often occuring sampling rate rounded to 1e-6
209 double computeSamplingRate(Date data_start_date, Date data_end_date, std::vector<MeteoData> vecM);
210
211 Date findFirstDateWithSamplingRate(const std::vector<MeteoData> &vecM, const double sampling_rate, const Date &data_start_date,
212 Date &data_end_date);
213 Date adjustStartDate(const std::vector<MeteoData> &vecM, const ARIMA_GAP &last_gap, Date data_start_date,
214 const Date &data_end_date);
215
216 template <typename T> std::string convertVectorsToString(const std::vector<std::vector<T>> &vecs) {
217 std::ostringstream oss;
218 size_t maxSize = 0;
219 for (const auto &vec : vecs) {
220 maxSize = std::max(maxSize, vec.size());
221 }
222
223 // Print headers
224 for (size_t i = 0; i < vecs.size(); i++) {
225 oss << std::left << std::setw(10) << "Vector" + std::to_string(i + 1);
226 }
227 oss << std::endl;
228 oss << std::string(vecs.size() * 10, '-') << std::endl;
229
230 for (size_t i = 0; i < maxSize; i++) {
231 for (const auto &vec : vecs) {
232 // Print elements from vec or "NaN" if out of range
233 if (i < vec.size()) {
234 oss << std::left << std::setw(10) << vec[i];
235 } else {
236 oss << std::left << std::setw(10) << "NaN";
237 }
238 }
239 oss << std::endl;
240 }
241 return oss.str();
242 }
243
244 template <typename T> void printVectors(const std::vector<std::vector<T>> &vecs) {
245 size_t maxSize = 0;
246 for (const auto &vec : vecs) {
247 maxSize = std::max(maxSize, vec.size());
248 }
249
250 // Print headers
251 for (size_t i = 0; i < vecs.size(); i++) {
252 std::cout << std::left << std::setw(10) << "Vector" + std::to_string(i + 1);
253 }
254 std::cout << std::endl;
255 std::cout << std::string(vecs.size() * 10, '-') << std::endl;
256
257 for (size_t i = 0; i < maxSize; i++) {
258 for (const auto &vec : vecs) {
259 // Print elements from vec or "NaN" if out of range
260 if (i < vec.size()) {
261 std::cout << std::left << std::setw(10) << vec[i];
262 } else {
263 std::cout << std::left << std::setw(10) << "NaN";
264 }
265 }
266 std::cout << std::endl;
267 }
268 }
269
270 template <typename T> void printVectors(const std::vector<Date> &vec1, const std::vector<T> &vec2) {
271 size_t maxSize = std::max(vec1.size(), vec2.size());
272
273 // Print headers
274 std::cout << std::left << std::setw(30) << "Date1"
275 << "| Date2" << std::endl;
276 std::cout << "--------------------------------------------------" << std::endl;
277
278 for (size_t i = 0; i < maxSize; i++) {
279 // Print date from vec1 or "NaN" if out of range
280 if (i < vec1.size()) {
281 std::cout << std::left << std::setw(30) << vec1[i].toString(Date::ISO) << "| ";
282 } else {
283 std::cout << std::left << std::setw(30) << "NaN"
284 << "| ";
285 }
286
287 // Print date from vec2 or "NaN" if out of range
288 if (i < vec2.size()) {
289 std::cout << vec2[i] << std::endl;
290 } else {
291 std::cout << "NaN" << std::endl;
292 }
293 }
294 }
295
296 } // namespace ARIMAutils
297} // namespace mio
298#endif // UTILS_H
static const int MAX_ARIMA_EXTRAPOLATION
Definition: ARIMAutils.h:28
static const int MIN_ARIMA_DATA_POINTS
Definition: ARIMAutils.h:27
static const double DATE_TOLERANCE
Definition: ARIMAutils.h:26
Definition: ARIMAutils.h:52
Mode getMode()
Definition: ARIMAutils.h:64
Normalization()
Definition: ARIMAutils.cc:30
std::vector< double > normalize(const std::vector< double > &data)
Definition: ARIMAutils.cc:36
std::vector< double > denormalize(const std::vector< double > &data)
Definition: ARIMAutils.cc:50
Mode
Definition: ARIMAutils.h:54
@ ZScore
Definition: ARIMAutils.h:56
@ MinMax
Definition: ARIMAutils.h:55
@ Nothing
Definition: ARIMAutils.h:57
void setMode(Mode new_mode)
Definition: ARIMAutils.h:63
A class to handle timestamps. This class handles conversion between different time display formats (I...
Definition: Date.h:87
const std::string toString(const FORMATS &type, const bool &gmt=false) const
Return a nicely formated string.
Definition: Date.cc:1120
@ ISO
ISO 8601 extended format combined date: YYYY-MM-DDTHH:mm:SS.sss (fields might be dropped,...
Definition: Date.h:91
size_t searchForward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector< MeteoData > &vecM, const Date &resampling_date, const double &i_window_size, const size_t &indexP1)
Definition: ARIMAutils.cc:191
double stdDev(const std::vector< double > &vec)
Definition: ARIMAutils.cc:96
void computeARIMAGap(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector< MeteoData > &vecM, const Date &resampling_date, size_t &indexP1, size_t &indexP2, double &before_window, double &after_window, double &window_size, Date &data_start_date, Date &data_end_date)
Definition: ARIMAutils.cc:277
std::vector< double > decideDirection(const std::vector< double > &data, const std::string &direction, bool forward, size_t gap_loc, size_t length)
Definition: ARIMAutils.cc:125
static Date findFirstDateWithSamplingRate(const std::vector< MeteoData > &vecM, const double sampling_rate, const Date &data_start_date, const Date &data_end_date)
Definition: ARIMAutils.cc:332
double calcVecMean(const std::vector< double > &vec)
Definition: ARIMAutils.cc:91
std::vector< double > slice(const std::vector< double > &vec, size_t start, size_t N)
Definition: ARIMAutils.cc:66
size_t searchBackward(ARIMA_GAP &last_gap, const size_t &pos, const size_t &paramindex, const std::vector< MeteoData > &vecM, const Date &resampling_date, const double &i_window_size)
Definition: ARIMAutils.cc:143
std::vector< double > toVector(const std::vector< MeteoData > &vecM, const std::string &paramname)
Definition: ARIMAutils.cc:106
std::vector< double > arange(size_t start, size_t N)
Definition: ARIMAutils.cc:82
std::string convertVectorsToString(const std::vector< std::vector< T > > &vecs)
Definition: ARIMAutils.h:216
Date adjustStartDate(const std::vector< MeteoData > &vecM, const ARIMA_GAP &last_gap, Date data_start_date, const Date &data_end_date)
Definition: ARIMAutils.cc:349
OptimizationMethod
Definition: ARIMAutils.h:39
@ Conjugate_Gradient
Definition: ARIMAutils.h:44
@ LBFGS
Definition: ARIMAutils.h:46
@ Newton_Trust_Region_Hook_Step
Definition: ARIMAutils.h:42
@ Newton_Trust_Region_Double_Dog_Leg
Definition: ARIMAutils.h:43
@ BFGS_MTM
Definition: ARIMAutils.h:47
@ Nelder_Mead
Definition: ARIMAutils.h:40
@ Newton_Line_Search
Definition: ARIMAutils.h:41
@ BFGS
Definition: ARIMAutils.h:45
ObjectiveFunction
Definition: ARIMAutils.h:34
@ MLE
Definition: ARIMAutils.h:36
@ CSS_MLE
Definition: ARIMAutils.h:35
@ CSS
Definition: ARIMAutils.h:37
std::vector< T > reverseVectorReturn(const std::vector< T > &vec)
Definition: ARIMAutils.h:119
void reverseVector(std::vector< T > &vec)
Definition: ARIMAutils.h:104
double computeSamplingRate(Date data_start_date, Date data_end_date, std::vector< MeteoData > vecM)
Definition: ARIMAutils.cc:316
T findMinMax(const std::vector< T > &vec, bool findMin)
Definition: ARIMAutils.h:85
void printVectors(const std::vector< std::vector< T > > &vecs)
Definition: ARIMAutils.h:244
double mostLikelyValue(const std::vector< double > &vec)
Definition: ARIMAutils.cc:301
bool requal(const Date &date1, const Date &date2)
Definition: ARIMAutils.cc:248
static const double e
Definition: Meteoconst.h:68
const size_t npos
npos is the out-of-range value
Definition: IOUtils.h:80
Definition: Config.cc:31
static double forward(double x, const std::vector< double > &params, EditingRegFill::RegressionType regtype)
Definition: DataEditingAlgorithms.cc:1007
Definition: ARIMAutils.h:147
ARIMA_GAP()
Definition: ARIMAutils.h:148
void setEnd(const size_t &idx, const std::vector< MeteoData > &vecM)
Definition: ARIMAutils.h:161
Date endDate
Definition: ARIMAutils.h:174
bool isGap()
Definition: ARIMAutils.h:176
void extend(const size_t &idx, const std::vector< MeteoData > &vecM)
Definition: ARIMAutils.h:149
std::string toString() const
Definition: ARIMAutils.h:179
void setStart(const size_t &idx, const std::vector< MeteoData > &vecM)
Definition: ARIMAutils.h:155
size_t end
Definition: ARIMAutils.h:173
Date startDate
Definition: ARIMAutils.h:174
void reset()
Definition: ARIMAutils.h:167
double sampling_rate
Definition: ARIMAutils.h:175
size_t start
Definition: ARIMAutils.h:173