Lean  $LEAN_TAG$
PandasData.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
16 using Python.Runtime;
17 using QuantConnect.Data;
20 using QuantConnect.Util;
21 using System;
22 using System.Collections;
23 using System.Collections.Concurrent;
24 using System.Collections.Generic;
25 using System.Globalization;
26 using System.Linq;
27 using System.Reflection;
28 
29 namespace QuantConnect.Python
30 {
31  /// <summary>
32  /// Organizes a list of data to create pandas.DataFrames
33  /// </summary>
34  public class PandasData
35  {
36  private const string Open = "open";
37  private const string High = "high";
38  private const string Low = "low";
39  private const string Close = "close";
40  private const string Volume = "volume";
41 
42  private const string AskOpen = "askopen";
43  private const string AskHigh = "askhigh";
44  private const string AskLow = "asklow";
45  private const string AskClose = "askclose";
46  private const string AskPrice = "askprice";
47  private const string AskSize = "asksize";
48 
49  private const string BidOpen = "bidopen";
50  private const string BidHigh = "bidhigh";
51  private const string BidLow = "bidlow";
52  private const string BidClose = "bidclose";
53  private const string BidPrice = "bidprice";
54  private const string BidSize = "bidsize";
55 
56  private const string LastPrice = "lastprice";
57  private const string Quantity = "quantity";
58  private const string Exchange = "exchange";
59  private const string Suspicious = "suspicious";
60  private const string OpenInterest = "openinterest";
61 
62  // we keep these so we don't need to ask for them each time
63  private static PyString _empty;
64  private static PyObject _pandas;
65  private static PyObject _seriesFactory;
66  private static PyObject _dataFrameFactory;
67  private static PyObject _multiIndexFactory;
68 
69  private static PyList _defaultNames;
70  private static PyList _level2Names;
71  private static PyList _level3Names;
72 
73  private readonly static HashSet<string> _baseDataProperties = typeof(BaseData).GetProperties().ToHashSet(x => x.Name.ToLowerInvariant());
74  private readonly static ConcurrentDictionary<Type, IEnumerable<MemberInfo>> _membersByType = new ();
75  private readonly static IReadOnlyList<string> _standardColumns = new string []
76  {
77  Open, High, Low, Close, LastPrice, Volume,
78  AskOpen, AskHigh, AskLow, AskClose, AskPrice, AskSize, Quantity, Suspicious,
79  BidOpen, BidHigh, BidLow, BidClose, BidPrice, BidSize, Exchange, OpenInterest
80  };
81 
82  private readonly Symbol _symbol;
83  private readonly bool _isFundamentalType;
84  private readonly Dictionary<string, Serie> _series;
85 
86  private readonly IEnumerable<MemberInfo> _members = Enumerable.Empty<MemberInfo>();
87 
88  /// <summary>
89  /// Gets true if this is a custom data request, false for normal QC data
90  /// </summary>
91  public bool IsCustomData { get; }
92 
93  /// <summary>
94  /// Implied levels of a multi index pandas.Series (depends on the security type)
95  /// </summary>
96  public int Levels { get; } = 2;
97 
98  /// <summary>
99  /// Initializes an instance of <see cref="PandasData"/>
100  /// </summary>
101  public PandasData(object data)
102  {
103  if (_pandas == null)
104  {
105  using (Py.GIL())
106  {
107  // Use our PandasMapper class that modifies pandas indexing to support tickers, symbols and SIDs
108  _pandas = Py.Import("PandasMapper");
109  _seriesFactory = _pandas.GetAttr("Series");
110  _dataFrameFactory = _pandas.GetAttr("DataFrame");
111  using var multiIndex = _pandas.GetAttr("MultiIndex");
112  _multiIndexFactory = multiIndex.GetAttr("from_tuples");
113  _empty = new PyString(string.Empty);
114 
115  var time = new PyString("time");
116  var symbol = new PyString("symbol");
117  var expiry = new PyString("expiry");
118  _defaultNames = new PyList(new PyObject[] { expiry, new PyString("strike"), new PyString("type"), symbol, time });
119  _level2Names = new PyList(new PyObject[] { symbol, time });
120  _level3Names = new PyList(new PyObject[] { expiry, symbol, time });
121  }
122  }
123 
124  // in the case we get a list/collection of data we take the first data point to determine the type
125  // but it's also possible to get a data which supports enumerating we don't care about those cases
126  if (data is not IBaseData && data is IEnumerable enumerable)
127  {
128  foreach (var item in enumerable)
129  {
130  data = item;
131  break;
132  }
133  }
134 
135  var type = data.GetType();
136  _isFundamentalType = type == typeof(Fundamental);
137  IsCustomData = type.Namespace != typeof(Bar).Namespace;
138  _symbol = ((IBaseData)data).Symbol;
139 
140  if (_symbol.SecurityType == SecurityType.Future) Levels = 3;
141  if (_symbol.SecurityType.IsOption()) Levels = 5;
142 
143  IEnumerable<string> columns = _standardColumns;
144 
145  if (IsCustomData || ((IBaseData)data).DataType == MarketDataType.Auxiliary)
146  {
147  var keys = (data as DynamicData)?.GetStorageDictionary()
148  // if this is a PythonData instance we add in '__typename' which we don't want into the data frame
149  .Where(x => !x.Key.StartsWith("__", StringComparison.InvariantCulture)).ToHashSet(x => x.Key);
150 
151  // C# types that are not DynamicData type
152  if (keys == null)
153  {
154  if (_membersByType.TryGetValue(type, out _members))
155  {
156  keys = _members.ToHashSet(x => x.Name.ToLowerInvariant());
157  }
158  else
159  {
160  var members = type.GetMembers().Where(x => x.MemberType == MemberTypes.Field || x.MemberType == MemberTypes.Property).ToList();
161 
162  var duplicateKeys = members.GroupBy(x => x.Name.ToLowerInvariant()).Where(x => x.Count() > 1).Select(x => x.Key);
163  foreach (var duplicateKey in duplicateKeys)
164  {
165  throw new ArgumentException($"PandasData.ctor(): {Messages.PandasData.DuplicateKey(duplicateKey, type.FullName)}");
166  }
167 
168  // If the custom data derives from a Market Data (e.g. Tick, TradeBar, QuoteBar), exclude its keys
169  keys = members.ToHashSet(x => x.Name.ToLowerInvariant());
170  keys.ExceptWith(_baseDataProperties);
171  keys.ExceptWith(GetPropertiesNames(typeof(QuoteBar), type));
172  keys.ExceptWith(GetPropertiesNames(typeof(TradeBar), type));
173  keys.ExceptWith(GetPropertiesNames(typeof(Tick), type));
174  keys.Add("value");
175 
176  _members = members.Where(x => keys.Contains(x.Name.ToLowerInvariant())).ToList();
177  _membersByType.TryAdd(type, _members);
178  }
179  }
180 
181  var customColumns = new HashSet<string>(columns);
182  customColumns.Add("value");
183  customColumns.UnionWith(keys);
184 
185  columns = customColumns;
186  }
187 
188  _series = columns.ToDictionary(k => k, v => new Serie());
189  }
190 
191  /// <summary>
192  /// Adds security data object to the end of the lists
193  /// </summary>
194  /// <param name="baseData"><see cref="IBaseData"/> object that contains security data</param>
195  public void Add(object baseData)
196  {
197  var endTime = ((IBaseData)baseData).EndTime;
198  foreach (var member in _members)
199  {
200  // TODO field/property.GetValue is expensive
201  var key = member.Name.ToLowerInvariant();
202  var propertyMember = member as PropertyInfo;
203  if (propertyMember != null)
204  {
205  var propertyValue = propertyMember.GetValue(baseData);
206  if (_isFundamentalType && propertyMember.PropertyType.IsAssignableTo(typeof(FundamentalTimeDependentProperty)))
207  {
208  propertyValue = ((FundamentalTimeDependentProperty)propertyValue).Clone(new FixedTimeProvider(endTime));
209  }
210  AddToSeries(key, endTime, propertyValue);
211  continue;
212  }
213  else
214  {
215  var fieldMember = member as FieldInfo;
216  if (fieldMember != null)
217  {
218  AddToSeries(key, endTime, fieldMember.GetValue(baseData));
219  }
220  }
221  }
222 
223  var storage = (baseData as DynamicData)?.GetStorageDictionary();
224  if (storage != null)
225  {
226  var value = ((IBaseData) baseData).Value;
227  AddToSeries("value", endTime, value);
228 
229  foreach (var kvp in storage.Where(x => x.Key != "value"
230  // if this is a PythonData instance we add in '__typename' which we don't want into the data frame
231  && !x.Key.StartsWith("__", StringComparison.InvariantCulture)))
232  {
233  AddToSeries(kvp.Key, endTime, kvp.Value);
234  }
235  }
236  else
237  {
238  var tick = baseData as Tick;
239  if (tick != null)
240  {
241  AddTick(tick);
242  }
243  else
244  {
245  var tradeBar = baseData as TradeBar;
246  var quoteBar = baseData as QuoteBar;
247  Add(tradeBar, quoteBar);
248  }
249  }
250  }
251 
252  /// <summary>
253  /// Adds Lean data objects to the end of the lists
254  /// </summary>
255  /// <param name="tradeBar"><see cref="TradeBar"/> object that contains trade bar information of the security</param>
256  /// <param name="quoteBar"><see cref="QuoteBar"/> object that contains quote bar information of the security</param>
257  public void Add(TradeBar tradeBar, QuoteBar quoteBar)
258  {
259  if (tradeBar != null)
260  {
261  var time = tradeBar.EndTime;
262  GetSerie(Open).Add(time, tradeBar.Open);
263  GetSerie(High).Add(time, tradeBar.High);
264  GetSerie(Low).Add(time, tradeBar.Low);
265  GetSerie(Close).Add(time, tradeBar.Close);
266  GetSerie(Volume).Add(time, tradeBar.Volume);
267  }
268  if (quoteBar != null)
269  {
270  var time = quoteBar.EndTime;
271  if (tradeBar == null)
272  {
273  GetSerie(Open).Add(time, quoteBar.Open);
274  GetSerie(High).Add(time, quoteBar.High);
275  GetSerie(Low).Add(time, quoteBar.Low);
276  GetSerie(Close).Add(time, quoteBar.Close);
277  }
278  if (quoteBar.Ask != null)
279  {
280  GetSerie(AskOpen).Add(time, quoteBar.Ask.Open);
281  GetSerie(AskHigh).Add(time, quoteBar.Ask.High);
282  GetSerie(AskLow).Add(time, quoteBar.Ask.Low);
283  GetSerie(AskClose).Add(time, quoteBar.Ask.Close);
284  GetSerie(AskSize).Add(time, quoteBar.LastAskSize);
285  }
286  if (quoteBar.Bid != null)
287  {
288  GetSerie(BidOpen).Add(time, quoteBar.Bid.Open);
289  GetSerie(BidHigh).Add(time, quoteBar.Bid.High);
290  GetSerie(BidLow).Add(time, quoteBar.Bid.Low);
291  GetSerie(BidClose).Add(time, quoteBar.Bid.Close);
292  GetSerie(BidSize).Add(time, quoteBar.LastBidSize);
293  }
294  }
295  }
296 
297  /// <summary>
298  /// Adds a tick data point to this pandas collection
299  /// </summary>
300  /// <param name="tick"><see cref="Tick"/> object that contains tick information of the security</param>
301  public void AddTick(Tick tick)
302  {
303  var time = tick.EndTime;
304 
305  // We will fill some series with null for tick types that don't have a value for that series, so that we make sure
306  // the indices are the same for every tick series.
307 
308  if (tick.TickType == TickType.Quote)
309  {
310  GetSerie(AskPrice).Add(time, tick.AskPrice);
311  GetSerie(AskSize).Add(time, tick.AskSize);
312  GetSerie(BidPrice).Add(time, tick.BidPrice);
313  GetSerie(BidSize).Add(time, tick.BidSize);
314  }
315  else
316  {
317  // Trade and open interest ticks don't have these values, so we'll fill them with null.
318  GetSerie(AskPrice).Add(time, null);
319  GetSerie(AskSize).Add(time, null);
320  GetSerie(BidPrice).Add(time, null);
321  GetSerie(BidSize).Add(time, null);
322  }
323 
324  GetSerie(Exchange).Add(time, tick.Exchange);
325  GetSerie(Suspicious).Add(time, tick.Suspicious);
326  GetSerie(Quantity).Add(time, tick.Quantity);
327 
328  if (tick.TickType == TickType.OpenInterest)
329  {
330  GetSerie(OpenInterest).Add(time, tick.Value);
331  GetSerie(LastPrice).Add(time, null);
332  }
333  else
334  {
335  GetSerie(LastPrice).Add(time, tick.Value);
336  GetSerie(OpenInterest).Add(time, null);
337  }
338  }
339 
340  /// <summary>
341  /// Get the pandas.DataFrame of the current <see cref="PandasData"/> state
342  /// </summary>
343  /// <param name="levels">Number of levels of the multi index</param>
344  /// <returns>pandas.DataFrame object</returns>
345  public PyObject ToPandasDataFrame(int levels = 2)
346  {
347  List<PyObject> list;
348  var symbol = _symbol.ID.ToString().ToPython();
349 
350  // Create the index labels
351  var names = _defaultNames;
352  if (levels == 2)
353  {
354  // symbol, time
355  names = _level2Names;
356  list = new List<PyObject> { symbol, _empty };
357  }
358  else if (levels == 3)
359  {
360  // expiry, symbol, time
361  names = _level3Names;
362  list = new List<PyObject> { _symbol.ID.Date.ToPython(), symbol, _empty };
363  }
364  else
365  {
366  list = new List<PyObject> { _empty, _empty, _empty, symbol, _empty };
367  if (_symbol.SecurityType == SecurityType.Future)
368  {
369  list[0] = _symbol.ID.Date.ToPython();
370  }
371  else if (_symbol.SecurityType.IsOption())
372  {
373  list[0] = _symbol.ID.Date.ToPython();
374  list[1] = _symbol.ID.StrikePrice.ToPython();
375  list[2] = _symbol.ID.OptionRight.ToString().ToPython();
376  }
377  }
378 
379  // creating the pandas MultiIndex is expensive so we keep a cash
380  var indexCache = new Dictionary<List<DateTime>, PyObject>(new ListComparer<DateTime>());
381  // Returns a dictionary keyed by column name where values are pandas.Series objects
382  using var pyDict = new PyDict();
383  foreach (var kvp in _series)
384  {
385  if (kvp.Value.ShouldFilter) continue;
386 
387  if (!indexCache.TryGetValue(kvp.Value.Times, out var index))
388  {
389  using var tuples = kvp.Value.Times.Select(time => CreateTupleIndex(time, list)).ToPyListUnSafe();
390  using var namesDic = Py.kw("names", names);
391 
392  indexCache[kvp.Value.Times] = index = _multiIndexFactory.Invoke(new[] { tuples }, namesDic);
393 
394  foreach (var pyObject in tuples)
395  {
396  pyObject.Dispose();
397  }
398  }
399 
400  // Adds pandas.Series value keyed by the column name
401  using var pyvalues = new PyList();
402  for (var i = 0; i < kvp.Value.Values.Count; i++)
403  {
404  using var pyObject = kvp.Value.Values[i].ToPython();
405  pyvalues.Append(pyObject);
406  }
407  using var series = _seriesFactory.Invoke(pyvalues, index);
408  pyDict.SetItem(kvp.Key, series);
409  }
410  _series.Clear();
411  foreach (var kvp in indexCache)
412  {
413  kvp.Value.Dispose();
414  }
415 
416  for (var i = 0; i < list.Count; i++)
417  {
418  DisposeIfNotEmpty(list[i]);
419  }
420 
421  // Create the DataFrame
422  var result = _dataFrameFactory.Invoke(pyDict);
423 
424  foreach (var item in pyDict)
425  {
426  item.Dispose();
427  }
428 
429  return result;
430  }
431 
432  /// <summary>
433  /// Only dipose of the PyObject if it was set to something different than empty
434  /// </summary>
435  private static void DisposeIfNotEmpty(PyObject pyObject)
436  {
437  if (!ReferenceEquals(pyObject, _empty))
438  {
439  pyObject.Dispose();
440  }
441  }
442 
443  /// <summary>
444  /// Create a new tuple index
445  /// </summary>
446  private static PyTuple CreateTupleIndex(DateTime index, List<PyObject> list)
447  {
448  DisposeIfNotEmpty(list[list.Count - 1]);
449  list[list.Count - 1] = index.ToPython();
450  return new PyTuple(list.ToArray());
451  }
452 
453  /// <summary>
454  /// Adds data to dictionary
455  /// </summary>
456  /// <param name="key">The key of the value to get</param>
457  /// <param name="time"><see cref="DateTime"/> object to add to the value associated with the specific key</param>
458  /// <param name="input"><see cref="Object"/> to add to the value associated with the specific key. Can be null.</param>
459  private void AddToSeries(string key, DateTime time, object input)
460  {
461  var serie = GetSerie(key);
462  serie.Add(time, input);
463  }
464 
465  private Serie GetSerie(string key)
466  {
467  if (!_series.TryGetValue(key, out var value))
468  {
469  throw new ArgumentException($"PandasData.GetSerie(): {Messages.PandasData.KeyNotFoundInSeries(key)}");
470  }
471  return value;
472  }
473 
474  /// <summary>
475  /// Get the lower-invariant name of properties of the type that a another type is assignable from
476  /// </summary>
477  /// <param name="baseType">The type that is assignable from</param>
478  /// <param name="type">The type that is assignable by</param>
479  /// <returns>List of string. Empty list if not assignable from</returns>
480  private static IEnumerable<string> GetPropertiesNames(Type baseType, Type type)
481  {
482  return baseType.IsAssignableFrom(type)
483  ? baseType.GetProperties().Select(x => x.Name.ToLowerInvariant())
484  : Enumerable.Empty<string>();
485  }
486 
487  private class Serie
488  {
489  private static readonly IFormatProvider InvariantCulture = CultureInfo.InvariantCulture;
490  public bool ShouldFilter { get; set; } = true;
491  public List<DateTime> Times { get; set; } = new();
492  public List<object> Values { get; set; } = new();
493 
494  public void Add(DateTime time, object input)
495  {
496  var value = input is decimal ? Convert.ToDouble(input, InvariantCulture) : input;
497  if (ShouldFilter)
498  {
499  // we need at least 1 valid entry for the series not to get filtered
500  if (value is double)
501  {
502  if (!((double)value).IsNaNOrZero())
503  {
504  ShouldFilter = false;
505  }
506  }
507  else if (value is string)
508  {
509  if (!string.IsNullOrWhiteSpace((string)value))
510  {
511  ShouldFilter = false;
512  }
513  }
514  else if (value is bool)
515  {
516  if ((bool)value)
517  {
518  ShouldFilter = false;
519  }
520  }
521  else if (value != null)
522  {
523  ShouldFilter = false;
524  }
525  }
526 
527  Values.Add(value);
528  Times.Add(time);
529  }
530 
531  public void Add(DateTime time, decimal input)
532  {
533  var value = Convert.ToDouble(input, InvariantCulture);
534  if (ShouldFilter && !value.IsNaNOrZero())
535  {
536  ShouldFilter = false;
537  }
538 
539  Values.Add(value);
540  Times.Add(time);
541  }
542  }
543 
544  private class FixedTimeProvider : ITimeProvider
545  {
546  private readonly DateTime _time;
547  public DateTime GetUtcNow() => _time;
548  public FixedTimeProvider(DateTime time)
549  {
550  _time = time;
551  }
552  }
553  }
554 }