Lean  $LEAN_TAG$
SeriesSampler.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
16 using System;
17 using System.Linq;
18 using QuantConnect.Util;
19 using System.Collections.Generic;
20 using System.Runtime.CompilerServices;
21 
22 namespace QuantConnect
23 {
24  /// <summary>
25  /// A type capable of taking a chart and resampling using a linear interpolation strategy
26  /// </summary>
27  public class SeriesSampler
28  {
29  /// <summary>
30  /// The desired sampling resolution
31  /// </summary>
32  protected TimeSpan Step { get; set; }
33 
34  /// <summary>
35  /// True if sub sampling is enabled, if false only subsampling will happen
36  /// </summary>
37  public bool SubSample { get; set; } = true;
38 
39  /// <summary>
40  /// Creates a new SeriesSampler to sample Series data on the specified resolution
41  /// </summary>
42  /// <param name="resolution">The desired sampling resolution</param>
43  public SeriesSampler(TimeSpan resolution)
44  {
45  Step = resolution;
46  }
47 
48  /// <summary>
49  /// Samples the given series
50  /// </summary>
51  /// <param name="series">The series to be sampled</param>
52  /// <param name="start">The date to start sampling, if before start of data then start of data will be used</param>
53  /// <param name="stop">The date to stop sampling, if after stop of data, then stop of data will be used</param>
54  /// <param name="truncateValues">True will truncate values to integers</param>
55  /// <returns>The sampled series</returns>
56  public virtual BaseSeries Sample(BaseSeries series, DateTime start, DateTime stop, bool truncateValues = false)
57  {
58  if (!SubSample && series.Values.Count > 1)
59  {
60  var dataDiff = series.Values[1].Time - series.Values[0].Time;
61  if (dataDiff >= Step)
62  {
63  // we don't want to subsample this case, directly return what we are given as long as is within the range
64  return GetIdentitySeries(series.Clone(empty: true), series, start, stop, truncateValues: false);
65  }
66  }
67 
68  if (series is Series seriesToSample)
69  {
70  return SampleSeries(seriesToSample, start, stop, truncateValues);
71  }
72 
73  if (series is CandlestickSeries candlestickSeries)
74  {
75  return SampleCandlestickSeries(candlestickSeries, start, stop, truncateValues);
76  }
77 
78  throw new ArgumentException($"SeriesSampler.Sample(): Sampling only supports {typeof(Series)} and {typeof(CandlestickSeries)}");
79  }
80 
81  /// <summary>
82  /// Samples the given charts
83  /// </summary>
84  /// <param name="charts">The charts to be sampled</param>
85  /// <param name="start">The date to start sampling</param>
86  /// <param name="stop">The date to stop sampling</param>
87  /// <returns>The sampled charts</returns>
88  public Dictionary<string, Chart> SampleCharts(IDictionary<string, Chart> charts, DateTime start, DateTime stop)
89  {
90  var sampledCharts = new Dictionary<string, Chart>();
91  foreach (var chart in charts.Values)
92  {
93  sampledCharts[chart.Name] = SampleChart(chart, start, stop);
94  }
95  return sampledCharts;
96  }
97 
98  /// <summary>
99  /// Samples the given chart
100  /// </summary>
101  /// <param name="chart">The chart to be sampled</param>
102  /// <param name="start">The date to start sampling</param>
103  /// <param name="stop">The date to stop sampling</param>
104  /// <returns>The sampled chart</returns>
105  public Chart SampleChart(Chart chart, DateTime start, DateTime stop)
106  {
107  var sampledChart = chart.CloneEmpty();
108  foreach (var series in chart.Series.Values)
109  {
110  var sampledSeries = Sample(series, start, stop);
111  sampledChart.AddSeries(sampledSeries);
112  }
113  return sampledChart;
114  }
115 
116  /// <summary>
117  /// Samples the given series
118  /// </summary>
119  /// <param name="series">The series to be sampled</param>
120  /// <param name="start">The date to start sampling, if before start of data then start of data will be used</param>
121  /// <param name="stop">The date to stop sampling, if after stop of data, then stop of data will be used</param>
122  /// <param name="truncateValues">True will truncate values to integers</param>
123  /// <returns>The sampled series</returns>
124  private Series SampleSeries(Series series, DateTime start, DateTime stop, bool truncateValues)
125  {
126  var sampled = (Series)series.Clone(empty: true);
127 
128  var nextSampleTime = start;
129 
130  // we can't sample a single point and it doesn't make sense to sample scatter plots
131  // in this case just copy the raw data
132  if (series.Values.Count < 2 || series.SeriesType == SeriesType.Scatter || series.SeriesType == SeriesType.StackedArea)
133  {
134  return GetIdentitySeries(sampled, series, start, stop, truncateValues);
135  }
136 
137  var enumerator = series.Values.Cast<ChartPoint>().GetEnumerator();
138 
139  // initialize current/previous
140  enumerator.MoveNext();
141  var previous = enumerator.Current;
142  enumerator.MoveNext();
143  var current = enumerator.Current;
144 
145  // make sure we don't start sampling before the data begins
146  if (nextSampleTime < previous.Time)
147  {
148  nextSampleTime = previous.Time;
149  }
150 
151  // make sure to advance into the requested time frame before sampling
152  while (current.Time < nextSampleTime && enumerator.MoveNext())
153  {
154  previous = current;
155  current = enumerator.Current;
156  }
157 
158  do
159  {
160  // iterate until we pass where we want our next point
161  while (nextSampleTime <= current.Time && nextSampleTime <= stop)
162  {
163  ISeriesPoint sampledPoint;
164  if (series.SeriesType == SeriesType.Treemap)
165  {
166  // just carry along the values
167  sampledPoint = new ChartPoint(nextSampleTime, (nextSampleTime + Step) > current.Time ? current.Y : previous.Y);
168  }
169  else
170  {
171  sampledPoint = TruncateValue(Interpolate(previous, current, nextSampleTime, (decimal)Step.TotalSeconds), truncateValues, clone: false);
172  }
173 
174  nextSampleTime += Step;
175  if (SubSample)
176  {
177  sampled.Values.Add(sampledPoint);
178  }
179  else
180  {
181  if (current.Time < nextSampleTime)
182  {
183  sampled.Values.Add(sampledPoint);
184  }
185  }
186  }
187 
188  // advance our current/previous
189  if (nextSampleTime > current.Time)
190  {
191  if (enumerator.MoveNext())
192  {
193  previous = current;
194  current = enumerator.Current;
195  }
196  else
197  {
198  break;
199  }
200  }
201  }
202  // if we've passed our stop then we're finished sampling
203  while (nextSampleTime <= stop);
204 
205  enumerator.DisposeSafely();
206  return sampled;
207  }
208 
209  /// <summary>
210  /// Samples the given candlestick series
211  /// </summary>
212  /// <param name="series">The series to be sampled</param>
213  /// <param name="start">The date to start sampling, if before start of data then start of data will be used</param>
214  /// <param name="stop">The date to stop sampling, if after stop of data, then stop of data will be used</param>
215  /// <param name="truncateValues">True will truncate values to integers</param>
216  /// <returns>The sampled series</returns>
217  private CandlestickSeries SampleCandlestickSeries(CandlestickSeries series, DateTime start, DateTime stop, bool truncateValues)
218  {
219  var sampledSeries = (CandlestickSeries)series.Clone(empty: true);
220 
221  var candlesticks = series.Values;
222  var seriesSize = candlesticks.Count;
223 
224  // we can't sample a single point, so just copy the raw data
225  if (seriesSize < 2)
226  {
227  return GetIdentitySeries(sampledSeries, series, start, stop, truncateValues);
228  }
229 
230  // Make sure we don't start sampling before the data begins.
231  var nextSampleTime = start;
232  if (start < candlesticks[0].Time)
233  {
234  nextSampleTime = candlesticks[0].Time;
235  }
236 
237  // Find the first candlestick that is after the start time.
238  // This variable will also be used to keep track of the first candlestick to be aggregated.
239  var startIndex = candlesticks.FindIndex(x => x.Time > nextSampleTime) - 1;
240  if (startIndex < 0)
241  {
242  // there's not value before the start, just return identity
243  return GetIdentitySeries(sampledSeries, series, start, stop, truncateValues);
244  }
245  if (candlesticks[startIndex].Time == nextSampleTime && nextSampleTime <= stop)
246  {
247  sampledSeries.Values.Add(candlesticks[startIndex].Clone());
248  nextSampleTime += Step;
249  startIndex++;
250  }
251 
252  // We iterate ignoring the last candlestick because we need to check the next candlestick on each iteration.
253  for (var i = startIndex; i < seriesSize && nextSampleTime <= stop; i++)
254  {
255  var current = (Candlestick)candlesticks[i];
256  Candlestick next = null;
257  if (i + 1 < candlesticks.Count)
258  {
259  next = (Candlestick)candlesticks[i + 1];
260  }
261  if (nextSampleTime > current.Time)
262  {
263  // these bars will be aggregated
264  continue;
265  }
266 
267  // Form the bar(s) between candlesticks at startIndex and i
268  var aggregated = startIndex != i;
269  var sampledCandlestick = AggregateCandlesticks(candlesticks, startIndex, i + 1, nextSampleTime, truncateValues);
270 
271  var first = (Candlestick)candlesticks[startIndex];
272  var firstOpenTime = startIndex > 0
273  ? candlesticks[startIndex - 1].Time
274  : first.Time - (candlesticks[startIndex + 1].Time - candlesticks[startIndex].Time);
275  Candlestick previous = null;
276  var isNull = false;
277  do
278  {
279  var interpolated = Interpolate(sampledCandlestick, first, current, firstOpenTime, nextSampleTime, (decimal)Step.TotalSeconds);
280  nextSampleTime += Step;
281 
282  if (SubSample)
283  {
284  if (previous != null)
285  {
286  interpolated.Open = previous.Close;
287  }
288  sampledSeries.Values.Add(interpolated);
289  }
290  else if (current.Time < nextSampleTime)
291  {
292  sampledSeries.Values.Add(interpolated);
293  }
294  previous = interpolated;
295 
296  if (!aggregated)
297  {
298  // when subsampling, we build the high and low based on the open and close of the interpolated bar, not the bar we are sampling
299  interpolated.High = interpolated.Close;
300  interpolated.Low = interpolated.Close;
301  if (interpolated.Open.HasValue)
302  {
303  if (!interpolated.Close.HasValue || interpolated.Open > interpolated.Close.Value)
304  {
305  interpolated.High = interpolated.Open.Value;
306  }
307  if (!interpolated.Close.HasValue || interpolated.Open < interpolated.Close.Value)
308  {
309  interpolated.Low = interpolated.Open.Value;
310  }
311  }
312  }
313 
314  if (next != null && (nextSampleTime + Step) < next.Time && interpolated.Open == null)
315  {
316  isNull = true;
317  }
318  else
319  {
320  isNull = false;
321  }
322  }
323  while ((nextSampleTime <= current.Time || isNull) && nextSampleTime <= stop);
324 
325  // Update the start index
326  startIndex = i + 1;
327  }
328 
329  return sampledSeries;
330  }
331 
332  /// <summary>
333  /// Aggregates the candlesticks in the given range into a single candlestick,
334  /// keeping the first open and last close and calculating highest high and lowest low
335  /// </summary>
336  private static Candlestick AggregateCandlesticks(List<ISeriesPoint> candlesticks, int start, int end, DateTime time, bool truncateValues)
337  {
338  var aggregatedCandlestick = new Candlestick
339  {
340  Time = time
341  };
342 
343  for (var j = start; j < end; j++)
344  {
345  var current = (Candlestick)candlesticks[j];
346  aggregatedCandlestick.Update(current.Open);
347  aggregatedCandlestick.Update(current.High);
348  aggregatedCandlestick.Update(current.Low);
349  aggregatedCandlestick.Update(current.Close);
350  }
351 
352  return (Candlestick)TruncateValue(aggregatedCandlestick, truncateValues, clone: false);
353  }
354 
355  /// <summary>
356  /// Linear interpolation used for sampling
357  /// </summary>
358  protected static decimal? Interpolate(decimal x0, decimal? y0, decimal x1, decimal? y1, decimal xTarget, decimal step)
359  {
360  if (!y1.HasValue)
361  {
362  // if the next point isn't there we wont interpolate the value, it means it's the end, unless the target time is the current time or close
363  if (xTarget - x0 <= step)
364  {
365  return y0;
366  }
367  return null;
368  }
369 
370  if (!y0.HasValue)
371  {
372  // if the previous value isn't there, return null unlesss we reach the target end time or close enough
373  if (x1 - xTarget <= step)
374  {
375  return y1;
376  }
377  return null;
378  }
379 
380  // y=mx+b
381  return (y1 - y0) * (xTarget - x0) / (x1 - x0) + y0;
382  }
383 
384  /// <summary>
385  /// Linear interpolation used for sampling
386  /// </summary>
387  private static ChartPoint Interpolate(ChartPoint previous, ChartPoint current, DateTime targetTime, decimal step)
388  {
389  if (current.X == previous.X)
390  {
391  return (ChartPoint)current.Clone();
392  }
393 
394  var targetUnixTime = Time.DateTimeToUnixTimeStamp(targetTime).SafeDecimalCast();
395 
396  return new ChartPoint(targetTime, Interpolate(previous.X, previous.Y, current.X, current.Y, targetUnixTime, step));
397  }
398 
399  /// <summary>
400  /// Linear interpolation used for sampling
401  /// </summary>
402  private static Candlestick Interpolate(Candlestick template, Candlestick first, Candlestick current,
403  DateTime firstOpenTime, DateTime targetTime, decimal step)
404  {
405  Candlestick result;
406  if (firstOpenTime == current.Time)
407  {
408  result = (Candlestick)current.Clone();
409  result.Time = targetTime;
410  return result;
411  }
412 
413  result = (Candlestick)template.Clone();
414  result.Time = targetTime;
415 
416  var targetUnixTime = Time.DateTimeToUnixTimeStamp(targetTime).SafeDecimalCast();
417  var firstOpenUnitTime = Time.DateTimeToUnixTimeStamp(firstOpenTime).SafeDecimalCast();
418  result.Close = Interpolate(firstOpenUnitTime, first.Open, current.LongTime, current.Close, targetUnixTime, step);
419 
420  return result;
421  }
422 
423  /// <summary>
424  /// Truncates the value/values of the point after cloning it to avoid mutating the original point
425  /// </summary>
426  [MethodImpl(MethodImplOptions.AggressiveInlining)]
427  private static ISeriesPoint TruncateValue(ISeriesPoint point, bool truncate, bool clone = false)
428  {
429  if (!truncate)
430  {
431  return point;
432  }
433 
434  var truncatedPoint = clone ? point.Clone() : point;
435 
436  if (truncatedPoint is ChartPoint chartPoint)
437  {
438  chartPoint.y = SafeTruncate(chartPoint.y);
439  }
440  else if (truncatedPoint is Candlestick candlestick)
441  {
442  candlestick.Open = SafeTruncate(candlestick.Open);
443  candlestick.High = SafeTruncate(candlestick.High);
444  candlestick.Low = SafeTruncate(candlestick.Low);
445  candlestick.Close = SafeTruncate(candlestick.Close);
446  }
447 
448  return truncatedPoint;
449  }
450 
451  /// <summary>
452  /// Gets the identity series, this is the series with no sampling applied.
453  /// </summary>
454  protected static T GetIdentitySeries<T>(T sampled, T series, DateTime start, DateTime stop, bool truncateValues)
455  where T : BaseSeries
456  {
457  // we can minimally verify we're within the start/stop interval
458  foreach (var point in series.Values)
459  {
460  if (point.Time >= start && point.Time <= stop)
461  {
462  sampled.Values.Add(TruncateValue(point, truncateValues, clone: true));
463  }
464  }
465  return sampled;
466  }
467 
468  private static decimal? SafeTruncate(decimal? value)
469  {
470  if (value.HasValue)
471  {
472  return Math.Truncate(value.Value);
473  }
474  return null;
475  }
476  }
477 }