Lean  $LEAN_TAG$
AutoRegressiveIntegratedMovingAverage.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
16 using System;
17 using System.Collections.Generic;
18 using System.Linq;
19 using MathNet.Numerics;
20 using MathNet.Numerics.LinearAlgebra.Double;
21 using MathNet.Numerics.LinearRegression;
22 
24 {
25  /// <summary>
26  /// An Autoregressive Intergrated Moving Average (ARIMA) is a time series model which can be used to describe a set of data.
27  /// In particular,with Xₜ representing the series, the model assumes the data are of form
28  /// (after differencing <see cref="_diffOrder" /> times):
29  /// <para>
30  /// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ
31  /// </para>
32  /// where the first sum has an upper limit of <see cref="_arOrder" /> and the second <see cref="_maOrder" />.
33  /// </summary>
35  {
36  private List<double> _residuals;
37  private readonly bool _intercept;
38  private readonly RollingWindow<double> _rollingData;
39 
40  /// <summary>
41  /// Differencing coefficient (d). Determines how many times the series should be differenced before fitting the
42  /// model.
43  /// </summary>
44  private readonly int _diffOrder;
45 
46  /// <summary>
47  /// AR coefficient -- p
48  /// </summary>
49  private readonly int _arOrder;
50 
51  /// <summary>
52  /// MA Coefficient -- q
53  /// </summary>
54  private readonly int _maOrder;
55 
56  /// <summary>
57  /// Fitted AR parameters (φ terms).
58  /// </summary>
59  public double[] ArParameters;
60 
61  /// <summary>
62  /// Fitted MA parameters (θ terms).
63  /// </summary>
64  public double[] MaParameters;
65 
66  /// <summary>
67  /// Fitted intercept (c term).
68  /// </summary>
69  public double Intercept;
70 
71  /// <summary>
72  /// Gets a flag indicating when this indicator is ready and fully initialized
73  /// </summary>
74  public override bool IsReady => _rollingData.IsReady;
75 
76  /// <summary>
77  /// Required period, in data points, for the indicator to be ready and fully initialized.
78  /// </summary>
79  public override int WarmUpPeriod { get; }
80 
81  /// <summary>
82  /// The variance of the residuals (Var(ε)) from the first step of <see cref="TwoStepFit" />.
83  /// </summary>
84  public double ArResidualError { get; private set; }
85 
86  /// <summary>
87  /// The variance of the residuals (Var(ε)) from the second step of <see cref="TwoStepFit" />.
88  /// </summary>
89  public double MaResidualError { get; private set; }
90 
91  /// <summary>
92  /// Fits an ARIMA(arOrder,diffOrder,maOrder) model of form (after differencing it <see cref="_diffOrder" /> times):
93  /// <para>
94  /// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ
95  /// </para>
96  /// where the first sum has an upper limit of <see cref="_arOrder" /> and the second <see cref="_maOrder" />.
97  /// This particular constructor fits the model by means of <see cref="TwoStepFit" /> for a specified name.
98  /// </summary>
99  /// <param name="name">The name of the indicator</param>
100  /// <param name="arOrder">AR order (p) -- defines the number of past values to consider in the AR component of the model.</param>
101  /// <param name="diffOrder">Difference order (d) -- defines how many times to difference the model before fitting parameters.</param>
102  /// <param name="maOrder">MA order -- defines the number of past values to consider in the MA component of the model.</param>
103  /// <param name="period">Size of the rolling series to fit onto</param>
104  /// <param name="intercept">Whether ot not to include the intercept term</param>
106  string name,
107  int arOrder,
108  int diffOrder,
109  int maOrder,
110  int period,
111  bool intercept = true
112  )
113  : base(name)
114  {
115  if (arOrder < 0 || maOrder < 0)
116  {
117  throw new ArgumentException("AR/MA orders cannot be negative.");
118  }
119 
120  if (arOrder == 0)
121  {
122  throw new ArgumentException("arOrder (p) must be greater than zero for all " +
123  "currently available fitting methods.");
124  }
125 
126  if (period < Math.Max(arOrder, maOrder))
127  {
128  throw new ArgumentException("Period must exceed both arOrder and maOrder");
129  }
130 
131  _arOrder = arOrder;
132  _maOrder = maOrder;
133  _diffOrder = diffOrder;
134  WarmUpPeriod = period;
135  _rollingData = new RollingWindow<double>(period);
136  _intercept = intercept;
137  }
138 
139  /// <summary>
140  /// Fits an ARIMA(arOrder,diffOrder,maOrder) model of form (after differencing it <see cref="_diffOrder" /> times):
141  /// <para>
142  /// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ
143  /// </para>
144  /// where the first sum has an upper limit of <see cref="_arOrder" /> and the second <see cref="_maOrder" />.
145  /// This particular constructor fits the model by means of <see cref="TwoStepFit" /> using ordinary least squares.
146  /// </summary>
147  /// <param name="arOrder">AR order (p) -- defines the number of past values to consider in the AR component of the model.</param>
148  /// <param name="diffOrder">Difference order (d) -- defines how many times to difference the model before fitting parameters.</param>
149  /// <param name="maOrder">MA order (q) -- defines the number of past values to consider in the MA component of the model.</param>
150  /// <param name="period">Size of the rolling series to fit onto</param>
151  /// <param name="intercept">Whether to include an intercept term (c)</param>
153  int arOrder,
154  int diffOrder,
155  int maOrder,
156  int period,
157  bool intercept
158  )
159  : this($"ARIMA(({arOrder}, {diffOrder}, {maOrder}), {period}, {intercept})", arOrder, diffOrder, maOrder,
160  period, intercept)
161  {
162  }
163 
164  /// <summary>
165  /// Resets this indicator to its initial state
166  /// </summary>
167  public override void Reset()
168  {
169  base.Reset();
170  _rollingData.Reset();
171  }
172 
173  /// <summary>
174  /// Forecasts the series of the fitted model one point ahead.
175  /// </summary>
176  /// <param name="input">The input given to the indicator</param>
177  /// <returns>A new value for this indicator</returns>
178  protected override decimal ComputeNextValue(IndicatorDataPoint input)
179  {
180  _rollingData.Add((double)input.Value);
181  if (_rollingData.IsReady)
182  {
183  var arrayData = _rollingData.ToArray();
184  arrayData = _diffOrder > 0 ? DifferenceSeries(_diffOrder, arrayData, out _diffHeads) : arrayData;
185  TwoStepFit(arrayData);
186  double summants = 0;
187  if (_arOrder > 0)
188  {
189  for (var i = 0; i < _arOrder; i++) // AR Parameters
190  {
191  summants += ArParameters[i] * arrayData[i];
192  }
193  }
194 
195  if (_maOrder > 0)
196  {
197  for (var i = 0; i < _maOrder; i++) // MA Parameters
198  {
199  summants += MaParameters[i] * _residuals[_maOrder + i + 1];
200  }
201  }
202 
203  summants += Intercept; // By default equals 0
204 
205  if (_diffOrder > 0)
206  {
207  var dataCast = arrayData.ToList();
208  dataCast.Insert(0, summants); // Prepends
209  summants = InverseDifferencedSeries(dataCast.ToArray(), _diffHeads).First(); // Returns disintegrated series
210  }
211 
212  return (decimal)summants;
213  }
214 
215  return 0m;
216  }
217 
218  /// <summary>
219  /// Fits the model by means of implementing the following pseudo-code algorithm (in the form of "if{then}"):
220  /// <code>
221  /// if diffOrder > 0 {Difference data diffOrder times}
222  /// if arOrder > 0 {Fit the AR model Xₜ = ΣᵢφᵢXₜ; ε's are set to residuals from fitting this.}
223  /// if maOrder > 0 {Fit the MA parameters left over Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ}
224  /// Return: φ and θ estimates.
225  /// </code>
226  /// http://mbhauser.com/informal-notes/two-step-arma-estimation.pdf
227  /// </summary>
228  private void TwoStepFit(double[] series) // Protected for any future inheritors (e.g., SARIMA)
229  {
230  _residuals = new List<double>();
231  double errorAr = 0;
232  double errorMa = 0;
233  var lags = _arOrder > 0 ? LaggedSeries(_arOrder, series) : new[] {series};
234 
235  AutoRegressiveStep(lags, series, errorAr);
236 
237  if (_maOrder <= 0)
238  {
239  return;
240  }
241 
242  MovingAverageStep(lags, series, errorMa);
243  }
244 
245  /// <summary>
246  /// Fits the moving average component in the <see cref="TwoStepFit"/> method.
247  /// </summary>
248  /// <param name="lags">An array of lagged data (<see cref="TimeSeriesIndicator.LaggedSeries"/>).</param>
249  /// <param name="data">The input series, differenced <see cref="_diffOrder"/> times.</param>
250  /// <param name="errorMa">The summed residuals (by default 0) associated with the MA component.</param>
251  private void MovingAverageStep(double[][] lags, double[] data, double errorMa)
252  {
253  var appendedData = new List<double[]>();
254  var laggedErrors = LaggedSeries(_maOrder, _residuals.ToArray());
255  for (var i = 0; i < laggedErrors.Length; i++)
256  {
257  var doubles = lags[i].ToList();
258  doubles.AddRange(laggedErrors[i]);
259  appendedData.Add(doubles.ToArray());
260  }
261 
262  var maFits = Fit.MultiDim(appendedData.ToArray(), data.Skip(_maOrder).ToArray(),
263  method: DirectRegressionMethod.NormalEquations, intercept: _intercept);
264  for (var i = _maOrder; i < data.Length; i++) // Calculate the error assoc. with model.
265  {
266  var paramVector = _intercept
267  ? Vector.Build.Dense(maFits.Skip(1).ToArray())
268  : Vector.Build.Dense(maFits);
269  var residual = data[i] - Vector.Build.Dense(appendedData[i - _maOrder]).DotProduct(paramVector);
270  errorMa += Math.Pow(residual, 2);
271  }
272 
273  switch (_intercept)
274  {
275  case true:
276  MaResidualError = errorMa / (data.Length - Math.Max(_arOrder, _maOrder) - 1);
277  MaParameters = maFits.Skip(1 + _arOrder).ToArray();
278  ArParameters = maFits.Skip(1).Take(_arOrder).ToArray();
279  Intercept = maFits[0];
280  break;
281  default:
282  MaResidualError = errorMa / (data.Length - Math.Max(_arOrder, _maOrder) - 1);
283  MaParameters = maFits.Skip(_arOrder).ToArray();
284  ArParameters = maFits.Take(_arOrder).ToArray();
285  break;
286  }
287  }
288 
289  /// <summary>
290  /// Fits the autoregressive component in the <see cref="TwoStepFit"/> method.
291  /// </summary>
292  /// <param name="lags">An array of lagged data (<see cref="TimeSeriesIndicator.LaggedSeries"/>).</param>
293  /// <param name="data">The input series, differenced <see cref="_diffOrder"/> times.</param>
294  /// <param name="errorAr">The summed residuals (by default 0) associated with the AR component.</param>
295  private void AutoRegressiveStep(double[][] lags, double[] data, double errorAr)
296  {
297  double[] arFits;
298  // The function (lags[time][lagged X]) |---> ΣᵢφᵢXₜ₋ᵢ
299  arFits = Fit.MultiDim(lags, data.Skip(_arOrder).ToArray(),
300  method: DirectRegressionMethod.NormalEquations);
301  var fittedVec = Vector.Build.Dense(arFits);
302 
303  for (var i = 0; i < data.Length; i++) // Calculate the error assoc. with model.
304  {
305  if (i < _arOrder)
306  {
307  _residuals.Add(0); // 0-padding
308  continue;
309  }
310 
311  var residual = data[i] - Vector.Build.Dense(lags[i - _arOrder]).DotProduct(fittedVec);
312  errorAr += Math.Pow(residual, 2);
313  _residuals.Add(residual);
314  }
315 
316  ArResidualError = errorAr / (data.Length - _arOrder - 1);
317  if (_maOrder == 0)
318  {
319  ArParameters = arFits; // Will not be thrown out
320  }
321  }
322  }
323 }