Lean  $LEAN_TAG$
PandasConverter.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
16 using Python.Runtime;
17 using QuantConnect.Data;
20 using QuantConnect.Util;
21 using System;
22 using System.Collections.Generic;
23 using System.Linq;
24 
25 namespace QuantConnect.Python
26 {
27  /// <summary>
28  /// Collection of methods that converts lists of objects in pandas.DataFrame
29  /// </summary>
30  public class PandasConverter
31  {
32  private static dynamic _pandas;
33  private static PyObject _concat;
34 
35  /// <summary>
36  /// Creates an instance of <see cref="PandasConverter"/>.
37  /// </summary>
38  public PandasConverter()
39  {
40  if (_pandas == null)
41  {
42  using (Py.GIL())
43  {
44  var pandas = Py.Import("pandas");
45  _pandas = pandas;
46  // keep it so we don't need to ask for it each time
47  _concat = pandas.GetAttr("concat");
48  }
49  }
50  }
51 
52  /// <summary>
53  /// Converts an enumerable of <see cref="Slice"/> in a pandas.DataFrame
54  /// </summary>
55  /// <param name="data">Enumerable of <see cref="Slice"/></param>
56  /// <param name="dataType">Optional type of bars to add to the data frame</param>
57  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
58  public PyObject GetDataFrame(IEnumerable<Slice> data, Type dataType = null)
59  {
60  var maxLevels = 0;
61  var sliceDataDict = new Dictionary<SecurityIdentifier, PandasData>();
62 
63  // if no data type is requested we check all
64  var requestedTick = dataType == null || dataType == typeof(Tick) || dataType == typeof(OpenInterest);
65  var requestedTradeBar = dataType == null || dataType == typeof(TradeBar);
66  var requestedQuoteBar = dataType == null || dataType == typeof(QuoteBar);
67 
68  foreach (var slice in data)
69  {
70  AddSliceDataTypeDataToDict(slice, requestedTick, requestedTradeBar, requestedQuoteBar, sliceDataDict, ref maxLevels);
71  }
72 
73  using (Py.GIL())
74  {
75  if (sliceDataDict.Count == 0)
76  {
77  return _pandas.DataFrame();
78  }
79  using var dataFrames = sliceDataDict.Select(x => x.Value.ToPandasDataFrame(maxLevels)).ToPyListUnSafe();
80  using var sortDic = Py.kw("sort", true);
81  var result = _concat.Invoke(new[] { dataFrames }, sortDic);
82 
83  foreach (var df in dataFrames)
84  {
85  df.Dispose();
86  }
87  return result;
88  }
89  }
90 
91  /// <summary>
92  /// Converts an enumerable of <see cref="IBaseData"/> in a pandas.DataFrame
93  /// </summary>
94  /// <param name="data">Enumerable of <see cref="Slice"/></param>
95  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
96  /// <remarks>Helper method for testing</remarks>
97  public PyObject GetDataFrame<T>(IEnumerable<T> data)
98  where T : IBaseData
99  {
100  PandasData sliceData = null;
101  foreach (var datum in data)
102  {
103  if (sliceData == null)
104  {
105  sliceData = new PandasData(datum);
106  }
107 
108  sliceData.Add(datum);
109  }
110 
111  using (Py.GIL())
112  {
113  // If sliceData is still null, data is an empty enumerable
114  // returns an empty pandas.DataFrame
115  if (sliceData == null)
116  {
117  return _pandas.DataFrame();
118  }
119  return sliceData.ToPandasDataFrame();
120  }
121  }
122 
123  /// <summary>
124  /// Converts a dictionary with a list of <see cref="IndicatorDataPoint"/> in a pandas.DataFrame
125  /// </summary>
126  /// <param name="data">Dictionary with a list of <see cref="IndicatorDataPoint"/></param>
127  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
128  public PyObject GetIndicatorDataFrame(IDictionary<string, List<IndicatorDataPoint>> data)
129  {
130  using (Py.GIL())
131  {
132  var pyDict = new PyDict();
133 
134  foreach (var kvp in data)
135  {
136  AddSeriesToPyDict(kvp.Key, kvp.Value, pyDict);
137  }
138 
139  return MakeIndicatorDataFrame(pyDict);
140  }
141  }
142 
143  /// <summary>
144  /// Converts a dictionary with a list of <see cref="IndicatorDataPoint"/> in a pandas.DataFrame
145  /// </summary>
146  /// <param name="data"><see cref="PyObject"/> that should be a dictionary (convertible to PyDict) of string to list of <see cref="IndicatorDataPoint"/></param>
147  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
148  public PyObject GetIndicatorDataFrame(PyObject data)
149  {
150  using (Py.GIL())
151  {
152  using var inputPythonType = data.GetPythonType();
153  var inputTypeStr = inputPythonType.ToString();
154  var targetTypeStr = nameof(PyDict);
155  PyObject currentKvp = null;
156 
157  try
158  {
159  using var pyDictData = new PyDict(data);
160  using var seriesPyDict = new PyDict();
161 
162  targetTypeStr = $"{nameof(String)}: {nameof(List<IndicatorDataPoint>)}";
163 
164  foreach (var kvp in pyDictData.Items())
165  {
166  currentKvp = kvp;
167  AddSeriesToPyDict(kvp[0].As<string>(), kvp[1].As<List<IndicatorDataPoint>>(), seriesPyDict);
168  }
169 
170  return MakeIndicatorDataFrame(seriesPyDict);
171  }
172  catch (Exception e)
173  {
174  if (currentKvp != null)
175  {
176  inputTypeStr = $"{currentKvp[0].GetPythonType()}: {currentKvp[1].GetPythonType()}";
177  }
178 
179  throw new ArgumentException(Messages.PandasConverter.ConvertToDictionaryFailed(inputTypeStr, targetTypeStr, e.Message), e);
180  }
181  }
182  }
183 
184  /// <summary>
185  /// Returns a string that represent the current object
186  /// </summary>
187  /// <returns></returns>
188  public override string ToString()
189  {
190  return _pandas == null
191  ? Messages.PandasConverter.PandasModuleNotImported
192  : _pandas.Repr();
193  }
194 
195  /// <summary>
196  /// Creates a series from a list of <see cref="IndicatorDataPoint"/> and adds it to the
197  /// <see cref="PyDict"/> as the value of the given <paramref name="key"/>
198  /// </summary>
199  /// <param name="key">Key to insert in the <see cref="PyDict"/></param>
200  /// <param name="points">List of <see cref="IndicatorDataPoint"/> that will make up the resulting series</param>
201  /// <param name="pyDict"><see cref="PyDict"/> where the resulting key-value pair will be inserted into</param>
202  private void AddSeriesToPyDict(string key, List<IndicatorDataPoint> points, PyDict pyDict)
203  {
204  var index = new List<DateTime>();
205  var values = new List<double>();
206 
207  foreach (var point in points)
208  {
209  index.Add(point.EndTime);
210  values.Add((double) point.Value);
211  }
212  pyDict.SetItem(key.ToLowerInvariant(), _pandas.Series(values, index));
213  }
214 
215  /// <summary>
216  /// Converts a <see cref="PyDict"/> of string to pandas.Series in a pandas.DataFrame
217  /// </summary>
218  /// <param name="pyDict"><see cref="PyDict"/> of string to pandas.Series</param>
219  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
220  private PyObject MakeIndicatorDataFrame(PyDict pyDict)
221  {
222  return _pandas.DataFrame(pyDict, columns: pyDict.Keys().Select(x => x.As<string>().ToLowerInvariant()).OrderBy(x => x));
223  }
224 
225  /// <summary>
226  /// Gets the <see cref="PandasData"/> for the given symbol if it exists in the dictionary, otherwise it creates a new instance with the
227  /// given base data and adds it to the dictionary
228  /// </summary>
229  private PandasData GetPandasDataValue(IDictionary<SecurityIdentifier, PandasData> sliceDataDict, Symbol symbol, object data, ref int maxLevels)
230  {
231  PandasData value;
232  if (!sliceDataDict.TryGetValue(symbol.ID, out value))
233  {
234  sliceDataDict[symbol.ID] = value = new PandasData(data);
235  maxLevels = Math.Max(maxLevels, value.Levels);
236  }
237 
238  return value;
239  }
240 
241  /// <summary>
242  /// Adds each slice data corresponding to the requested data type to the pandas data dictionary
243  /// </summary>
244  private void AddSliceDataTypeDataToDict(Slice slice, bool requestedTick, bool requestedTradeBar, bool requestedQuoteBar, IDictionary<SecurityIdentifier, PandasData> sliceDataDict, ref int maxLevels)
245  {
246  HashSet<SecurityIdentifier> _addedData = null;
247 
248  for (int i = 0; i < slice.AllData.Count; i++)
249  {
250  var baseData = slice.AllData[i];
251  var value = GetPandasDataValue(sliceDataDict, baseData.Symbol, baseData, ref maxLevels);
252 
253  if (value.IsCustomData)
254  {
255  value.Add(baseData);
256  }
257  else
258  {
259  var tick = requestedTick ? baseData as Tick : null;
260  if(tick == null)
261  {
262  // we add both quote and trade bars for each symbol at the same time, because they share the row in the data frame else it will generate 2 rows per series
263  if (requestedTradeBar && requestedQuoteBar)
264  {
265  _addedData ??= new();
266  if (!_addedData.Add(baseData.Symbol.ID))
267  {
268  continue;
269  }
270  }
271 
272  // the slice already has the data organized by symbol so let's take advantage of it using Bars/QuoteBars collections
273  QuoteBar quoteBar = null;
274  var tradeBar = requestedTradeBar ? baseData as TradeBar : null;
275  if (tradeBar != null)
276  {
277  slice.QuoteBars.TryGetValue(tradeBar.Symbol, out quoteBar);
278  }
279  else
280  {
281  quoteBar = requestedQuoteBar ? baseData as QuoteBar : null;
282  if (quoteBar != null)
283  {
284  slice.Bars.TryGetValue(quoteBar.Symbol, out tradeBar);
285  }
286  }
287  value.Add(tradeBar, quoteBar);
288  }
289  else
290  {
291  value.AddTick(tick);
292  }
293  }
294  }
295  }
296  }
297 }