Lean  $LEAN_TAG$
AutoRegressiveIntegratedMovingAverage.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
16 using System;
17 using System.Collections.Generic;
18 using System.Linq;
19 using MathNet.Numerics;
20 using MathNet.Numerics.LinearAlgebra.Double;
21 using MathNet.Numerics.LinearRegression;
22 
24 {
25  /// <summary>
26  /// An Autoregressive Intergrated Moving Average (ARIMA) is a time series model which can be used to describe a set of data.
27  /// In particular,with Xₜ representing the series, the model assumes the data are of form
28  /// (after differencing <see cref="_diffOrder" /> times):
29  /// <para>
30  /// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ
31  /// </para>
32  /// where the first sum has an upper limit of <see cref="_arOrder" /> and the second <see cref="_maOrder" />.
33  /// </summary>
35  {
36  private List<double> _residuals;
37  private readonly bool _intercept;
38  private bool _loggedOnceInMovingAverageStep;
39  private bool _loggedOnceInAutoRegressiveStep;
40  private readonly RollingWindow<double> _rollingData;
41 
42  /// <summary>
43  /// Differencing coefficient (d). Determines how many times the series should be differenced before fitting the
44  /// model.
45  /// </summary>
46  private readonly int _diffOrder;
47 
48  /// <summary>
49  /// AR coefficient -- p
50  /// </summary>
51  private readonly int _arOrder;
52 
53  /// <summary>
54  /// MA Coefficient -- q
55  /// </summary>
56  private readonly int _maOrder;
57 
58  /// <summary>
59  /// Whether or not to handle potential exceptions, returning a zero value. I.e, the values
60  /// provided as input are not valid by the Normal Equations direct regression method
61  /// </summary>
62  public bool HandleExceptions { get; set; } = true;
63 
64  /// <summary>
65  /// Fitted AR parameters (φ terms).
66  /// </summary>
67  public double[] ArParameters { get; private set; }
68 
69  /// <summary>
70  /// Fitted MA parameters (θ terms).
71  /// </summary>
72  public double[] MaParameters { get; private set; }
73 
74  /// <summary>
75  /// Fitted intercept (c term).
76  /// </summary>
77  public double Intercept { get; private set; }
78 
79  /// <summary>
80  /// Gets a flag indicating when this indicator is ready and fully initialized
81  /// </summary>
82  public override bool IsReady => _rollingData.IsReady;
83 
84  /// <summary>
85  /// Required period, in data points, for the indicator to be ready and fully initialized.
86  /// </summary>
87  public override int WarmUpPeriod { get; }
88 
89  /// <summary>
90  /// The variance of the residuals (Var(ε)) from the first step of <see cref="TwoStepFit" />.
91  /// </summary>
92  public double ArResidualError { get; private set; }
93 
94  /// <summary>
95  /// The variance of the residuals (Var(ε)) from the second step of <see cref="TwoStepFit" />.
96  /// </summary>
97  public double MaResidualError { get; private set; }
98 
99  /// <summary>
100  /// Fits an ARIMA(arOrder,diffOrder,maOrder) model of form (after differencing it <see cref="_diffOrder" /> times):
101  /// <para>
102  /// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ
103  /// </para>
104  /// where the first sum has an upper limit of <see cref="_arOrder" /> and the second <see cref="_maOrder" />.
105  /// This particular constructor fits the model by means of <see cref="TwoStepFit" /> for a specified name.
106  /// </summary>
107  /// <param name="name">The name of the indicator</param>
108  /// <param name="arOrder">AR order (p) -- defines the number of past values to consider in the AR component of the model.</param>
109  /// <param name="diffOrder">Difference order (d) -- defines how many times to difference the model before fitting parameters.</param>
110  /// <param name="maOrder">MA order -- defines the number of past values to consider in the MA component of the model.</param>
111  /// <param name="period">Size of the rolling series to fit onto</param>
112  /// <param name="intercept">Whether or not to include the intercept term</param>
114  string name,
115  int arOrder,
116  int diffOrder,
117  int maOrder,
118  int period,
119  bool intercept = true
120  )
121  : base(name)
122  {
123  if (arOrder < 0 || maOrder < 0)
124  {
125  throw new ArgumentException("AR/MA orders cannot be negative.");
126  }
127 
128  if (arOrder == 0)
129  {
130  throw new ArgumentException("arOrder (p) must be greater than zero for all " +
131  "currently available fitting methods.");
132  }
133 
134  if (period < Math.Max(arOrder, maOrder))
135  {
136  throw new ArgumentException("Period must exceed both arOrder and maOrder");
137  }
138 
139  _arOrder = arOrder;
140  _maOrder = maOrder;
141  _diffOrder = diffOrder;
142  WarmUpPeriod = period;
143  _rollingData = new RollingWindow<double>(period);
144  _intercept = intercept;
145  }
146 
147  /// <summary>
148  /// Fits an ARIMA(arOrder,diffOrder,maOrder) model of form (after differencing it <see cref="_diffOrder" /> times):
149  /// <para>
150  /// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ
151  /// </para>
152  /// where the first sum has an upper limit of <see cref="_arOrder" /> and the second <see cref="_maOrder" />.
153  /// This particular constructor fits the model by means of <see cref="TwoStepFit" /> using ordinary least squares.
154  /// </summary>
155  /// <param name="arOrder">AR order (p) -- defines the number of past values to consider in the AR component of the model.</param>
156  /// <param name="diffOrder">Difference order (d) -- defines how many times to difference the model before fitting parameters.</param>
157  /// <param name="maOrder">MA order (q) -- defines the number of past values to consider in the MA component of the model.</param>
158  /// <param name="period">Size of the rolling series to fit onto</param>
159  /// <param name="intercept">Whether to include an intercept term (c)</param>
161  int arOrder,
162  int diffOrder,
163  int maOrder,
164  int period,
165  bool intercept
166  )
167  : this($"ARIMA(({arOrder}, {diffOrder}, {maOrder}), {period}, {intercept})", arOrder, diffOrder, maOrder,
168  period, intercept)
169  {
170  }
171 
172  /// <summary>
173  /// Resets this indicator to its initial state
174  /// </summary>
175  public override void Reset()
176  {
177  base.Reset();
178  _rollingData.Reset();
179  }
180 
181  /// <summary>
182  /// Forecasts the series of the fitted model one point ahead.
183  /// </summary>
184  /// <param name="input">The input given to the indicator</param>
185  /// <returns>A new value for this indicator</returns>
186  protected override decimal ComputeNextValue(IndicatorDataPoint input)
187  {
188  _rollingData.Add((double)input.Value);
189  if (_rollingData.IsReady)
190  {
191  var arrayData = _rollingData.ToArray();
192  double[] diffHeads = default;
193  arrayData = _diffOrder > 0 ? DifferenceSeries(_diffOrder, arrayData, out diffHeads) : arrayData;
194  _diffHeads = diffHeads;
195  TwoStepFit(arrayData);
196  double summants = 0;
197  if (_arOrder > 0)
198  {
199  for (var i = 0; i < _arOrder; i++) // AR Parameters
200  {
201  summants += ArParameters[i] * arrayData[i];
202  }
203  }
204 
205  if (_maOrder > 0)
206  {
207  for (var i = 0; i < _maOrder; i++) // MA Parameters
208  {
209  summants += MaParameters[i] * _residuals[_maOrder + i + 1];
210  }
211  }
212 
213  summants += Intercept; // By default equals 0
214 
215  if (_diffOrder > 0)
216  {
217  var dataCast = arrayData.ToList();
218  dataCast.Insert(0, summants); // Prepends
219  summants = InverseDifferencedSeries(dataCast.ToArray(), _diffHeads).First(); // Returns disintegrated series
220  }
221 
222  return (decimal)summants;
223  }
224 
225  return 0m;
226  }
227 
228  /// <summary>
229  /// Fits the model by means of implementing the following pseudo-code algorithm (in the form of "if{then}"):
230  /// <code>
231  /// if diffOrder > 0 {Difference data diffOrder times}
232  /// if arOrder > 0 {Fit the AR model Xₜ = ΣᵢφᵢXₜ; ε's are set to residuals from fitting this.}
233  /// if maOrder > 0 {Fit the MA parameters left over Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ}
234  /// Return: φ and θ estimates.
235  /// </code>
236  /// http://mbhauser.com/informal-notes/two-step-arma-estimation.pdf
237  /// </summary>
238  private void TwoStepFit(double[] series) // Protected for any future inheritors (e.g., SARIMA)
239  {
240  _residuals = new List<double>();
241  double errorAr = 0;
242  double errorMa = 0;
243  var lags = _arOrder > 0 ? LaggedSeries(_arOrder, series) : new[] {series};
244 
245  AutoRegressiveStep(lags, series, errorAr);
246 
247  if (_maOrder <= 0)
248  {
249  return;
250  }
251 
252  MovingAverageStep(lags, series, errorMa);
253  }
254 
255  /// <summary>
256  /// Fits the moving average component in the <see cref="TwoStepFit"/> method.
257  /// </summary>
258  /// <param name="lags">An array of lagged data (<see cref="TimeSeriesIndicator.LaggedSeries"/>).</param>
259  /// <param name="data">The input series, differenced <see cref="_diffOrder"/> times.</param>
260  /// <param name="errorMa">The summed residuals (by default 0) associated with the MA component.</param>
261  private void MovingAverageStep(double[][] lags, double[] data, double errorMa)
262  {
263  var appendedData = new List<double[]>();
264  var laggedErrors = LaggedSeries(_maOrder, _residuals.ToArray());
265  for (var i = 0; i < laggedErrors.Length; i++)
266  {
267  var doubles = lags[i].ToList();
268  doubles.AddRange(laggedErrors[i]);
269  appendedData.Add(doubles.ToArray());
270  }
271 
272  double[] maFits = default;
273  if (HandleExceptions)
274  {
275  try
276  {
277  maFits = Fit.MultiDim(appendedData.ToArray(), data.Skip(_maOrder).ToArray(),
278  method: DirectRegressionMethod.NormalEquations, intercept: _intercept);
279  }
280  catch (Exception ex)
281  {
282  if (!_loggedOnceInMovingAverageStep)
283  {
284  Logging.Log.Error($"AutoRegressiveIntegratedMovingAverage.MovingAverageStep(): {ex.Message}");
285  _loggedOnceInMovingAverageStep = true;
286  }
287 
288  // The method Fit.MultiDim takes the appendedData array of mxn(m rows, n columns), computes its
289  // transpose of size nxm, and then multiplies the tranpose with the original matrix, so the
290  // resultant matrix is of size nxn. Then a linear system Ax=b is solved where A is the
291  // aforementioned matrix and b is the data. Thus, the size of the response x is n
292  //
293  // It's worth saying that if intercept flag is set to true, the number of columns of the initial
294  // matrix (appendedData) is increased in one. For more information, please see the implementation
295  // of Fit.MultiDim() method (Ctrl + right click)
296  var size = appendedData.ToArray()[0].Length + (_intercept ? 1 : 0);
297  maFits = new double[size];
298  }
299  }
300  else
301  {
302  maFits = Fit.MultiDim(appendedData.ToArray(), data.Skip(_maOrder).ToArray(),
303  method: DirectRegressionMethod.NormalEquations, intercept: _intercept);
304  }
305 
306  for (var i = _maOrder; i < data.Length; i++) // Calculate the error assoc. with model.
307  {
308  var paramVector = _intercept
309  ? Vector.Build.Dense(maFits.Skip(1).ToArray())
310  : Vector.Build.Dense(maFits);
311  var residual = data[i] - Vector.Build.Dense(appendedData[i - _maOrder]).DotProduct(paramVector);
312  errorMa += Math.Pow(residual, 2);
313  }
314 
315  switch (_intercept)
316  {
317  case true:
318  MaResidualError = errorMa / (data.Length - Math.Max(_arOrder, _maOrder) - 1);
319  MaParameters = maFits.Skip(1 + _arOrder).ToArray();
320  ArParameters = maFits.Skip(1).Take(_arOrder).ToArray();
321  Intercept = maFits[0];
322  break;
323  default:
324  MaResidualError = errorMa / (data.Length - Math.Max(_arOrder, _maOrder) - 1);
325  MaParameters = maFits.Skip(_arOrder).ToArray();
326  ArParameters = maFits.Take(_arOrder).ToArray();
327  break;
328  }
329  }
330 
331  /// <summary>
332  /// Fits the autoregressive component in the <see cref="TwoStepFit"/> method.
333  /// </summary>
334  /// <param name="lags">An array of lagged data (<see cref="TimeSeriesIndicator.LaggedSeries"/>).</param>
335  /// <param name="data">The input series, differenced <see cref="_diffOrder"/> times.</param>
336  /// <param name="errorAr">The summed residuals (by default 0) associated with the AR component.</param>
337  private void AutoRegressiveStep(double[][] lags, double[] data, double errorAr)
338  {
339  double[] arFits;
340  if (HandleExceptions)
341  {
342  try
343  {
344  // The function (lags[time][lagged X]) |---> ΣᵢφᵢXₜ₋ᵢ
345  arFits = Fit.MultiDim(lags, data.Skip(_arOrder).ToArray(),
346  method: DirectRegressionMethod.NormalEquations);
347  }
348  catch (Exception ex)
349  {
350  if (!_loggedOnceInAutoRegressiveStep)
351  {
352  Logging.Log.Error($"AutoRegressiveIntegratedMovingAverage.AutoRegressiveStep(): {ex.Message}");
353  _loggedOnceInAutoRegressiveStep = true;
354  }
355 
356  // The method Fit.MultiDim takes the lags array of mxn(m rows, n columns), computes its
357  // transpose of size nxm, and then multiplies the tranpose with the original matrix, so the
358  // resultant matrix is of size nxn. Then a linear system Ax=b is solved where A is the
359  // aforementioned matrix and b is the data. Thus, the size of the response x is n
360  //
361  // For more information, please see the implementation of Fit.MultiDim() method (Ctrl + right click)
362  var size = lags.ToArray()[0].Length;
363  arFits = new double[size];
364  }
365  }
366  else
367  {
368  // The function (lags[time][lagged X]) |---> ΣᵢφᵢXₜ₋ᵢ
369  arFits = Fit.MultiDim(lags, data.Skip(_arOrder).ToArray(),
370  method: DirectRegressionMethod.NormalEquations);
371  }
372 
373  var fittedVec = Vector.Build.Dense(arFits);
374 
375  for (var i = 0; i < data.Length; i++) // Calculate the error assoc. with model.
376  {
377  if (i < _arOrder)
378  {
379  _residuals.Add(0); // 0-padding
380  continue;
381  }
382 
383  var residual = data[i] - Vector.Build.Dense(lags[i - _arOrder]).DotProduct(fittedVec);
384  errorAr += Math.Pow(residual, 2);
385  _residuals.Add(residual);
386  }
387 
388  ArResidualError = errorAr / (data.Length - _arOrder - 1);
389  if (_maOrder == 0)
390  {
391  ArParameters = arFits; // Will not be thrown out
392  }
393  }
394  }
395 }