Lean  $LEAN_TAG$
Compression.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
16 using System;
17 using System.Collections.Generic;
18 using System.Diagnostics;
19 using System.IO;
20 using System.IO.Compression;
21 using System.Linq;
22 using System.Text;
23 using System.Threading.Tasks;
24 using ICSharpCode.SharpZipLib.Core;
25 using ICSharpCode.SharpZipLib.GZip;
26 using ICSharpCode.SharpZipLib.Tar;
27 using Ionic.Zip;
28 using QuantConnect.Logging;
29 using ZipEntry = ICSharpCode.SharpZipLib.Zip.ZipEntry;
30 using ZipFile = Ionic.Zip.ZipFile;
31 using ZipInputStream = ICSharpCode.SharpZipLib.Zip.ZipInputStream;
32 using ZipOutputStream = ICSharpCode.SharpZipLib.Zip.ZipOutputStream;
33 
34 namespace QuantConnect
35 {
36  /// <summary>
37  /// Compression class manages the opening and extraction of compressed files (zip, tar, tar.gz).
38  /// </summary>
39  /// <remarks>QuantConnect's data library is stored in zip format locally on the hard drive.</remarks>
40  public static class Compression
41  {
42  /// <summary>
43  /// Global Flag :: Operating System
44  /// </summary>
45  private static bool IsLinux
46  {
47  get
48  {
49  var p = (int)Environment.OSVersion.Platform;
50  return (p == 4) || (p == 6) || (p == 128);
51  }
52  }
53 
54  /// <summary>
55  /// Create a zip file of the supplied file names and string data source
56  /// </summary>
57  /// <param name="zipPath">Output location to save the file.</param>
58  /// <param name="filenamesAndData">File names and data in a dictionary format.</param>
59  /// <returns>True on successfully creating the zip file.</returns>
60  public static bool ZipData(string zipPath, Dictionary<string, string> filenamesAndData)
61  {
62  try
63  {
64  //Create our output
65  using (var stream = new ZipOutputStream(File.Create(zipPath)))
66  {
67  stream.SetLevel(0);
68  foreach (var kvp in filenamesAndData)
69  {
70  var filename = kvp.Key;
71  //Create the space in the zip file:
72  var entry = new ZipEntry(filename);
73  var bytes = Encoding.Default.GetBytes(kvp.Value);
74  stream.PutNextEntry(entry);
75  stream.Write(bytes, 0, bytes.Length);
76  stream.CloseEntry();
77  } // End For Each File.
78 
79  //Close stream:
80  stream.Finish();
81  stream.Close();
82  } // End Using
83  }
84  catch (Exception err)
85  {
86  Log.Error(err);
87  return false;
88  }
89  return true;
90  }
91 
92  /// <summary>
93  /// Create a zip file of the supplied file names and data using a byte array
94  /// </summary>
95  /// <param name="zipPath">Output location to save the file.</param>
96  /// <param name="filenamesAndData">File names and data in a dictionary format.</param>
97  /// <returns>True on successfully saving the file</returns>
98  public static bool ZipData(string zipPath, IEnumerable<KeyValuePair<string, byte[]>> filenamesAndData)
99  {
100  var success = true;
101  var buffer = new byte[4096];
102 
103  try
104  {
105  //Create our output
106  using (var stream = new ZipOutputStream(File.Create(zipPath)))
107  {
108  foreach (var file in filenamesAndData)
109  {
110  //Create the space in the zip file:
111  var entry = new ZipEntry(file.Key);
112  //Get a Byte[] of the file data:
113  stream.PutNextEntry(entry);
114 
115  using (var ms = new MemoryStream(file.Value))
116  {
117  int sourceBytes;
118  do
119  {
120  sourceBytes = ms.Read(buffer, 0, buffer.Length);
121  stream.Write(buffer, 0, sourceBytes);
122  }
123  while (sourceBytes > 0);
124  }
125  } // End For Each File.
126 
127  //Close stream:
128  stream.Finish();
129  stream.Close();
130  } // End Using
131  }
132  catch (Exception err)
133  {
134  Log.Error(err);
135  success = false;
136  }
137  return success;
138  }
139 
140  /// <summary>
141  /// Zips the specified lines of text into the zipPath
142  /// </summary>
143  /// <param name="zipPath">The destination zip file path</param>
144  /// <param name="zipEntry">The entry name in the zip</param>
145  /// <param name="lines">The lines to be written to the zip</param>
146  /// <returns>True if successful, otherwise false</returns>
147  public static bool ZipData(string zipPath, string zipEntry, IEnumerable<string> lines)
148  {
149  try
150  {
151  using (var stream = new ZipOutputStream(File.Create(zipPath)))
152  using (var writer = new StreamWriter(stream))
153  {
154  var entry = new ZipEntry(zipEntry);
155  stream.PutNextEntry(entry);
156  foreach (var line in lines)
157  {
158  writer.WriteLine(line);
159  }
160  }
161  return true;
162  }
163  catch (Exception err)
164  {
165  Log.Error(err);
166  return false;
167  }
168  }
169 
170  /// <summary>
171  /// Append the zip data to the file-entry specified.
172  /// </summary>
173  /// <param name="path">The zip file path</param>
174  /// <param name="entry">The entry name</param>
175  /// <param name="data">The entry data</param>
176  /// <param name="overrideEntry">True if should override entry if it already exists</param>
177  /// <returns>True on success</returns>
178  public static bool ZipCreateAppendData(string path, string entry, string data, bool overrideEntry = false)
179  {
180  try
181  {
182  using (var zip = File.Exists(path) ? ZipFile.Read(path) : new ZipFile(path))
183  {
184  if (zip.ContainsEntry(entry) && overrideEntry)
185  {
186  zip.RemoveEntry(entry);
187  }
188 
189  zip.AddEntry(entry, data);
190  zip.UseZip64WhenSaving = Zip64Option.Always;
191  zip.Save();
192  }
193  }
194  catch (Exception err)
195  {
196  Log.Error(err);
197  return false;
198  }
199  return true;
200  }
201 
202  /// <summary>
203  /// Append the zip data to the file-entry specified.
204  /// </summary>
205  /// <param name="path">The zip file path</param>
206  /// <param name="entry">The entry name</param>
207  /// <param name="data">The entry data</param>
208  /// <param name="overrideEntry">True if should override entry if it already exists</param>
209  /// <returns>True on success</returns>
210  public static bool ZipCreateAppendData(string path, string entry, byte[] data, bool overrideEntry = false)
211  {
212  try
213  {
214  using (var zip = File.Exists(path) ? ZipFile.Read(path) : new ZipFile(path))
215  {
216  if (overrideEntry && zip.ContainsEntry(entry))
217  {
218  zip.RemoveEntry(entry);
219  }
220 
221  zip.AddEntry(entry, data);
222  zip.UseZip64WhenSaving = Zip64Option.Always;
223  zip.Save();
224  }
225  }
226  catch (Exception err)
227  {
228  Log.Error(err, $"file: {path} entry: {entry}");
229  return false;
230  }
231  return true;
232  }
233 
234  /// <summary>
235  /// Uncompress zip data byte array into a dictionary string array of filename-contents.
236  /// </summary>
237  /// <param name="zipData">Byte data array of zip compressed information</param>
238  /// <param name="encoding">Specifies the encoding used to read the bytes. If not specified, defaults to ASCII</param>
239  /// <returns>Uncompressed dictionary string-sting of files in the zip</returns>
240  public static Dictionary<string, string> UnzipData(byte[] zipData, Encoding encoding = null)
241  {
242  using var stream = new MemoryStream(zipData);
243  return UnzipDataAsync(stream, encoding).ConfigureAwait(false).GetAwaiter().GetResult();
244  }
245 
246  /// <summary>
247  /// Uncompress zip data byte array into a dictionary string array of filename-contents.
248  /// </summary>
249  /// <param name="stream">Stream data of zip compressed information</param>
250  /// <param name="encoding">Specifies the encoding used to read the bytes. If not specified, defaults to ASCII</param>
251  /// <returns>Uncompressed dictionary string-sting of files in the zip</returns>
252  public static async Task<Dictionary<string, string>> UnzipDataAsync(Stream stream, Encoding encoding = null)
253  {
254  // Initialize:
255  var data = new Dictionary<string, string>();
256 
257  try
258  {
259  //Read out the zipped data into a string, save in array:
260  using (var zipStream = new ZipInputStream(stream))
261  {
262  while (true)
263  {
264  //Get the next file
265  var entry = zipStream.GetNextEntry();
266 
267  if (entry != null)
268  {
269  // Read the file into buffer:
270  var buffer = new byte[entry.Size];
271  await zipStream.ReadAsync(buffer, 0, (int)entry.Size).ConfigureAwait(false);
272 
273  //Save into array:
274  var str = (encoding ?? Encoding.ASCII).GetString(buffer);
275  data[entry.Name] = str;
276  }
277  else
278  {
279  break;
280  }
281  }
282  } // End Zip Stream.
283 
284  }
285  catch (Exception err)
286  {
287  Log.Error(err);
288  }
289  return data;
290  }
291 
292  /// <summary>
293  /// Performs an in memory zip of the specified bytes
294  /// </summary>
295  /// <param name="bytes">The file contents in bytes to be zipped</param>
296  /// <param name="zipEntryName">The zip entry name</param>
297  /// <returns>The zipped file as a byte array</returns>
298  public static byte[] ZipBytes(byte[] bytes, string zipEntryName)
299  {
300  using var memoryStream = new MemoryStream();
301  ZipBytesAsync(memoryStream, bytes, zipEntryName, null).ConfigureAwait(false).GetAwaiter().GetResult();
302  return memoryStream.ToArray();
303  }
304 
305  /// <summary>
306  /// Performs an in memory zip of the specified bytes in the target stream
307  /// </summary>
308  /// <param name="target">The target stream</param>
309  /// <param name="data">The file contents in bytes to be zipped</param>
310  /// <param name="zipEntryName">The zip entry name</param>
311  /// <param name="mode">The archive mode</param>
312  /// <param name="compressionLevel">The desired compression level</param>
313  /// <returns>The zipped file as a byte array</returns>
314  public static async Task ZipBytesAsync(Stream target, byte[] data, string zipEntryName, ZipArchiveMode? mode = null,
315  CompressionLevel? compressionLevel = null)
316  {
317  await ZipBytesAsync(target, [new KeyValuePair<byte[], string>(data, zipEntryName)], mode, compressionLevel).ConfigureAwait(false);
318  }
319 
320  /// <summary>
321  /// Performs an in memory zip of the specified bytes in the target stream
322  /// </summary>
323  /// <param name="target">The target stream</param>
324  /// <param name="data">The file contents in bytes to be zipped</param>
325  /// <param name="mode">The archive mode</param>
326  /// <param name="compressionLevel">The desired compression level</param>
327  /// <returns>The zipped file as a byte array</returns>
328  public static async Task ZipBytesAsync(Stream target, IEnumerable<KeyValuePair<byte[], string>> data, ZipArchiveMode? mode = null,
329  CompressionLevel? compressionLevel = null)
330  {
331  compressionLevel ??= CompressionLevel.SmallestSize;
332  using var archive = new ZipArchive(target, mode ?? ZipArchiveMode.Create, true);
333  foreach (var kvp in data)
334  {
335  var entry = archive.CreateEntry(kvp.Value, compressionLevel.Value);
336  using var entryStream = entry.Open();
337  await entryStream.WriteAsync(kvp.Key).ConfigureAwait(false);
338  }
339  }
340 
341  /// <summary>
342  /// Performs an in memory zip of the specified stream in the target stream
343  /// </summary>
344  /// <param name="target">The target stream</param>
345  /// <param name="data">The file contents in bytes to be zipped</param>
346  /// <param name="mode">The archive mode</param>
347  /// <param name="compressionLevel">The desired compression level</param>
348  /// <returns>The zipped file as a byte array</returns>
349  public static async Task ZipStreamsAsync(string target, IEnumerable<KeyValuePair<string, Stream>> data, ZipArchiveMode? mode = null,
350  CompressionLevel? compressionLevel = null)
351  {
352  using var fileStream = mode == ZipArchiveMode.Update
353  ? new FileStream(target, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None)
354  : new FileStream(target, FileMode.Create, FileAccess.Write, FileShare.None);
355  await ZipStreamsAsync(fileStream, data, mode, compressionLevel).ConfigureAwait(false);
356  }
357 
358  /// <summary>
359  /// Performs an in memory zip of the specified stream in the target stream
360  /// </summary>
361  /// <param name="target">The target stream</param>
362  /// <param name="data">The file contents in bytes to be zipped</param>
363  /// <param name="mode">The archive mode</param>
364  /// <param name="compressionLevel">The desired compression level</param>
365  /// <param name="leaveStreamOpen">True to leave the taget stream open</param>
366  /// <returns>The zipped file as a byte array</returns>
367  public static async Task ZipStreamsAsync(Stream target, IEnumerable<KeyValuePair<string, Stream>> data, ZipArchiveMode? mode = null,
368  CompressionLevel? compressionLevel = null, bool leaveStreamOpen = false)
369  {
370  compressionLevel ??= CompressionLevel.SmallestSize;
371  using var archive = new ZipArchive(target, mode ?? ZipArchiveMode.Create, leaveStreamOpen);
372  foreach (var kvp in data)
373  {
374  if (archive.Mode == ZipArchiveMode.Update)
375  {
376  var existingEntry = archive.GetEntry(kvp.Key);
377  existingEntry?.Delete();
378  }
379  var entry = archive.CreateEntry(kvp.Key, compressionLevel.Value);
380  using var entryStream = entry.Open();
381  await kvp.Value.CopyToAsync(entryStream).ConfigureAwait(false);
382  }
383  }
384 
385  /// <summary>
386  /// Extract .gz files to disk
387  /// </summary>
388  /// <param name="gzipFileName"></param>
389  /// <param name="targetDirectory"></param>
390  public static string UnGZip(string gzipFileName, string targetDirectory)
391  {
392  // Use a 4K buffer. Any larger is a waste.
393  var dataBuffer = new byte[4096];
394  var newFileOutput = Path.Combine(targetDirectory, Path.GetFileNameWithoutExtension(gzipFileName));
395  using (Stream fileStream = new FileStream(gzipFileName, FileMode.Open, FileAccess.Read))
396  using (var gzipStream = new GZipInputStream(fileStream))
397  using (var fileOutput = File.Create(newFileOutput))
398  {
399  StreamUtils.Copy(gzipStream, fileOutput, dataBuffer);
400  }
401  return newFileOutput;
402  }
403 
404  /// <summary>
405  /// Compress a given file and delete the original file. Automatically rename the file to name.zip.
406  /// </summary>
407  /// <param name="textPath">Path of the original file</param>
408  /// <param name="zipEntryName">The name of the entry inside the zip file</param>
409  /// <param name="deleteOriginal">Boolean flag to delete the original file after completion</param>
410  /// <returns>String path for the new zip file</returns>
411  public static string Zip(string textPath, string zipEntryName, bool deleteOriginal = true)
412  {
413  var zipPath = textPath.Replace(".csv", ".zip").Replace(".txt", ".zip");
414  Zip(textPath, zipPath, zipEntryName, deleteOriginal);
415  return zipPath;
416  }
417 
418  /// <summary>
419  /// Compresses the specified source file.
420  /// </summary>
421  /// <param name="source">The source file to be compressed</param>
422  /// <param name="destination">The destination zip file path</param>
423  /// <param name="zipEntryName">The zip entry name for the file</param>
424  /// <param name="deleteOriginal">True to delete the source file upon completion</param>
425  public static void Zip(string source, string destination, string zipEntryName, bool deleteOriginal)
426  {
427  try
428  {
429  var buffer = new byte[4096];
430  using (var stream = new ZipOutputStream(File.Create(destination)))
431  {
432  //Zip the text file.
433  var entry = new ZipEntry(zipEntryName);
434  stream.PutNextEntry(entry);
435 
436  using (var fs = File.OpenRead(source))
437  {
438  int sourceBytes;
439  do
440  {
441  sourceBytes = fs.Read(buffer, 0, buffer.Length);
442  stream.Write(buffer, 0, sourceBytes);
443  }
444  while (sourceBytes > 0);
445  }
446  }
447 
448  //Delete the old text file:
449  if (deleteOriginal)
450  {
451  File.Delete(source);
452  }
453  }
454  catch (Exception err)
455  {
456  Log.Error(err);
457  }
458  }
459 
460  /// <summary>
461  /// Compress a given file and delete the original file. Automatically rename the file to name.zip.
462  /// </summary>
463  /// <param name="textPath">Path of the original file</param>
464  /// <param name="deleteOriginal">Boolean flag to delete the original file after completion</param>
465  /// <returns>String path for the new zip file</returns>
466  public static string Zip(string textPath, bool deleteOriginal = true)
467  {
468  return Zip(textPath, Path.GetFileName(textPath), deleteOriginal);
469  }
470 
471  /// <summary>
472  /// Compress given data to the path given
473  /// </summary>
474  /// <param name="data">Data to write to zip</param>
475  /// <param name="zipPath">Path to write to</param>
476  /// <param name="zipEntry">Entry to save the data as</param>
477  public static void Zip(string data, string zipPath, string zipEntry)
478  {
479  using (var stream = new ZipOutputStream(File.Create(zipPath)))
480  {
481  var entry = new ZipEntry(zipEntry);
482  stream.PutNextEntry(entry);
483  var buffer = new byte[4096];
484  using (var dataReader = new MemoryStream(Encoding.Default.GetBytes(data)))
485  {
486  int sourceBytes;
487  do
488  {
489  sourceBytes = dataReader.Read(buffer, 0, buffer.Length);
490  stream.Write(buffer, 0, sourceBytes);
491  }
492  while (sourceBytes > 0);
493  }
494  }
495  }
496 
497  /// <summary>
498  /// Zips the specified directory, preserving folder structure
499  /// </summary>
500  /// <param name="directory">The directory to be zipped</param>
501  /// <param name="destination">The output zip file destination</param>
502  /// <param name="includeRootInZip">True to include the root 'directory' in the zip, false otherwise</param>
503  /// <returns>True on a successful zip, false otherwise</returns>
504  public static bool ZipDirectory(string directory, string destination, bool includeRootInZip = true)
505  {
506  try
507  {
508  if (File.Exists(destination)) File.Delete(destination);
509  System.IO.Compression.ZipFile.CreateFromDirectory(directory, destination, CompressionLevel.Fastest, includeRootInZip, new PathEncoder());
510  return true;
511  }
512  catch (Exception err)
513  {
514  Log.Error(err);
515  return false;
516  }
517  }
518 
519  /// <summary>
520  /// Encode the paths as linux format for cross platform compatibility
521  /// </summary>
522  private class PathEncoder : UTF8Encoding
523  {
524  public override byte[] GetBytes(string s)
525  {
526  s = s.Replace("\\", "/");
527  return base.GetBytes(s);
528  }
529  }
530 
531  /// <summary>
532  /// Unzips the specified zip file to the specified directory
533  /// </summary>
534  /// <param name="zip">The zip to be unzipped</param>
535  /// <param name="directory">The directory to place the unzipped files</param>
536  /// <param name="overwrite">Flag specifying whether or not to overwrite existing files</param>
537  public static bool Unzip(string zip, string directory, bool overwrite = false)
538  {
539  if (!File.Exists(zip)) return false;
540 
541  try
542  {
543  if (!overwrite)
544  {
545  System.IO.Compression.ZipFile.ExtractToDirectory(zip, directory);
546  }
547  else
548  {
549  using (var archive = new ZipArchive(File.OpenRead(zip)))
550  {
551  foreach (var file in archive.Entries)
552  {
553  // skip directories
554  if (string.IsNullOrEmpty(file.Name)) continue;
555  var filepath = Path.Combine(directory, file.FullName);
556  if (IsLinux) filepath = filepath.Replace(@"\", "/");
557  var outputFile = new FileInfo(filepath);
558  if (!outputFile.Directory.Exists)
559  {
560  outputFile.Directory.Create();
561  }
562  file.ExtractToFile(outputFile.FullName, true);
563  }
564  }
565  }
566 
567  return true;
568  }
569  catch (Exception err)
570  {
571  Log.Error(err);
572  return false;
573  }
574  }
575 
576  /// <summary>
577  /// Zips all files specified to a new zip at the destination path
578  /// </summary>
579  public static void ZipFiles(string destination, IEnumerable<string> files)
580  {
581  try
582  {
583  using (var zipStream = new ZipOutputStream(File.Create(destination)))
584  {
585  var buffer = new byte[4096];
586  foreach (var file in files)
587  {
588  if (!File.Exists(file))
589  {
590  Log.Trace($"ZipFiles(): File does not exist: {file}");
591  continue;
592  }
593 
594  var entry = new ZipEntry(Path.GetFileName(file));
595  zipStream.PutNextEntry(entry);
596  using (var fstream = File.OpenRead(file))
597  {
598  StreamUtils.Copy(fstream, zipStream, buffer);
599  }
600  }
601  }
602  }
603  catch (Exception err)
604  {
605  Log.Error(err);
606  }
607  }
608 
609  /// <summary>
610  /// Streams a local zip file using a streamreader.
611  /// Important: the caller must call Dispose() on the returned ZipFile instance.
612  /// </summary>
613  /// <param name="filename">Location of the original zip file</param>
614  /// <param name="zip">The ZipFile instance to be returned to the caller</param>
615  /// <returns>Stream reader of the first file contents in the zip file</returns>
616  public static StreamReader Unzip(string filename, out ZipFile zip)
617  {
618  return Unzip(filename, null, out zip);
619  }
620 
621  /// <summary>
622  /// Streams a local zip file using a streamreader.
623  /// Important: the caller must call Dispose() on the returned ZipFile instance.
624  /// </summary>
625  /// <param name="filename">Location of the original zip file</param>
626  /// <param name="zipEntryName">The zip entry name to open a reader for. Specify null to access the first entry</param>
627  /// <param name="zip">The ZipFile instance to be returned to the caller</param>
628  /// <returns>Stream reader of the first file contents in the zip file</returns>
629  public static StreamReader Unzip(string filename, string zipEntryName, out ZipFile zip)
630  {
631  StreamReader reader = null;
632  zip = null;
633 
634  try
635  {
636  if (File.Exists(filename))
637  {
638  try
639  {
640  zip = new ZipFile(filename);
641  var entry = zip.FirstOrDefault(x => zipEntryName == null || string.Compare(x.FileName, zipEntryName, StringComparison.OrdinalIgnoreCase) == 0);
642  if (entry == null)
643  {
644  // Unable to locate zip entry
645  return null;
646  }
647 
648  reader = new StreamReader(entry.OpenReader());
649  }
650  catch (Exception err)
651  {
652  Log.Error(err, "Inner try/catch");
653  if (zip != null) zip.Dispose();
654  if (reader != null) reader.Close();
655  }
656  }
657  else
658  {
659  Log.Error($"Data.UnZip(2): File doesn\'t exist: {filename}");
660  }
661  }
662  catch (Exception err)
663  {
664  Log.Error(err, "File: " + filename);
665  }
666  return reader;
667  }
668 
669  /// <summary>
670  /// Streams the unzipped file as key value pairs of file name to file contents.
671  /// NOTE: When the returned enumerable finishes enumerating, the zip stream will be
672  /// closed rendering all key value pair Value properties unaccessible. Ideally this
673  /// would be enumerated depth first.
674  /// </summary>
675  /// <remarks>
676  /// This method has the potential for a memory leak if each kvp.Value enumerable is not disposed
677  /// </remarks>
678  /// <param name="filename">The zip file to stream</param>
679  /// <returns>The stream zip contents</returns>
680  public static IEnumerable<KeyValuePair<string, List<string>>> Unzip(string filename)
681  {
682  if (!File.Exists(filename))
683  {
684  Log.Error($"Compression.Unzip(): File does not exist: {filename}");
685  return Enumerable.Empty<KeyValuePair<string, List<string>>>();
686  }
687 
688  try
689  {
690  return ReadLinesImpl(filename);
691  }
692  catch (Exception err)
693  {
694  Log.Error(err);
695  }
696  return Enumerable.Empty<KeyValuePair<string, List<string>>>();
697  }
698 
699  /// <summary>
700  /// Lazily unzips the specified stream
701  /// </summary>
702  /// <param name="stream">The zipped stream to be read</param>
703  /// <returns>An enumerable whose elements are zip entry key value pairs with
704  /// a key of the zip entry name and the value of the zip entry's file lines</returns>
705  public static IEnumerable<KeyValuePair<string, List<string>>> Unzip(Stream stream)
706  {
707  using (var zip = ZipFile.Read(stream))
708  {
709  foreach (var entry in zip)
710  {
711  yield return new KeyValuePair<string, List<string>>(entry.FileName, ReadZipEntry(entry));
712  }
713  }
714  }
715 
716  /// <summary>
717  /// Streams each line from the first zip entry in the specified zip file
718  /// </summary>
719  /// <param name="filename">The zip file path to stream</param>
720  /// <returns>An enumerable containing each line from the first unzipped entry</returns>
721  public static List<string> ReadLines(string filename)
722  {
723  if (!File.Exists(filename))
724  {
725  Log.Error($"Compression.ReadFirstZipEntry(): File does not exist: {filename}");
726  return new List<string>();
727  }
728 
729  try
730  {
731  return ReadLinesImpl(filename, firstEntryOnly: true).Single().Value;
732  }
733  catch (Exception err)
734  {
735  Log.Error(err);
736  }
737  return new List<string>();
738  }
739 
740  private static IEnumerable<KeyValuePair<string, List<string>>> ReadLinesImpl(string filename, bool firstEntryOnly = false)
741  {
742  using (var zip = ZipFile.Read(filename))
743  {
744  for (var i = 0; i < zip.Count; i++)
745  {
746  var entry = zip[i];
747  yield return new KeyValuePair<string, List<string>>(entry.FileName, ReadZipEntry(entry));
748  if (firstEntryOnly)
749  {
750  yield break;
751  }
752  }
753  }
754  }
755 
756  private static List<string> ReadZipEntry(Ionic.Zip.ZipEntry entry)
757  {
758  var result = new List<string>();
759  using var entryReader = new StreamReader(entry.OpenReader());
760  var line = entryReader.ReadLine();
761  while (line != null)
762  {
763  result.Add(line);
764  line = entryReader.ReadLine();
765  }
766  return result;
767  }
768 
769  /// <summary>
770  /// Unzip a local file and return its contents via streamreader:
771  /// </summary>
772  public static StreamReader UnzipStreamToStreamReader(Stream zipstream)
773  {
774  StreamReader reader = null;
775  try
776  {
777  //Initialise:
778  MemoryStream file;
779 
780  //If file exists, open a zip stream for it.
781  using (var zipStream = new ZipInputStream(zipstream))
782  {
783  //Read the file entry into buffer:
784  var entry = zipStream.GetNextEntry();
785  var buffer = new byte[entry.Size];
786  zipStream.Read(buffer, 0, (int)entry.Size);
787 
788  //Load the buffer into a memory stream.
789  file = new MemoryStream(buffer);
790  }
791 
792  //Open the memory stream with a stream reader.
793  reader = new StreamReader(file);
794  }
795  catch (Exception err)
796  {
797  Log.Error(err);
798  }
799 
800  return reader;
801  } // End UnZip
802 
803  /// <summary>
804  /// Unzip a stream that represents a zip file and return the first entry as a stream
805  /// </summary>
806  public static Stream UnzipStream(Stream zipstream, out ZipFile zipFile, string entryName = null)
807  {
808  zipFile = ZipFile.Read(zipstream);
809 
810  try
811  {
812  Ionic.Zip.ZipEntry entry;
813  if (string.IsNullOrEmpty(entryName))
814  {
815  //Read the file entry into buffer:
816  entry = zipFile.Entries.FirstOrDefault();
817  }
818  else
819  {
820  // Attempt to find our specific entry
821  if (!zipFile.ContainsEntry(entryName))
822  {
823  return null;
824  }
825  entry = zipFile[entryName];
826  }
827 
828  if (entry != null)
829  {
830  return entry.OpenReader();
831  }
832  }
833  catch (Exception err)
834  {
835  Log.Error(err);
836  }
837 
838  return null;
839  } // End UnZip
840 
841  /// <summary>
842  /// Unzip the given byte array and return the created file names.
843  /// </summary>
844  /// <param name="zipData">A byte array containing the zip</param>
845  /// <param name="outputFolder">The target output folder</param>
846  /// <returns>List of unzipped file names</returns>
847  public static List<string> UnzipToFolder(byte[] zipData, string outputFolder)
848  {
849  var stream = new MemoryStream(zipData);
850  return UnzipToFolder(stream, outputFolder);
851  }
852 
853  /// <summary>
854  /// Unzip a local file and return the created file names
855  /// </summary>
856  /// <param name="zipFile">Location of the zip on the HD</param>
857  /// <returns>List of unzipped file names</returns>
858  public static List<string> UnzipToFolder(string zipFile)
859  {
860  var outFolder = Path.GetDirectoryName(zipFile);
861  var stream = File.OpenRead(zipFile);
862  return UnzipToFolder(stream, outFolder);
863  }
864 
865  /// <summary>
866  /// Unzip the given data stream into the target output folder and return the created file names
867  /// </summary>
868  /// <param name="dataStream">The zip data stream</param>
869  /// <param name="outFolder">The target output folder</param>
870  /// <returns>List of unzipped file names</returns>
871  private static List<string> UnzipToFolder(Stream dataStream, string outFolder)
872  {
873  //1. Initialize:
874  var files = new List<string>();
875  if (string.IsNullOrEmpty(outFolder))
876  {
877  outFolder = Directory.GetCurrentDirectory();
878  }
879  ICSharpCode.SharpZipLib.Zip.ZipFile zf = null;
880 
881  try
882  {
883  zf = new ICSharpCode.SharpZipLib.Zip.ZipFile(dataStream);
884 
885  foreach (ZipEntry zipEntry in zf)
886  {
887  //Ignore Directories
888  if (!zipEntry.IsFile) continue;
889 
890  var buffer = new byte[4096]; // 4K is optimum
891  var zipStream = zf.GetInputStream(zipEntry);
892 
893  // Manipulate the output filename here as desired.
894  var fullZipToPath = Path.Combine(outFolder, zipEntry.Name);
895 
896  var targetFile = new FileInfo(fullZipToPath);
897  if (targetFile.Directory != null && !targetFile.Directory.Exists)
898  {
899  targetFile.Directory.Create();
900  }
901 
902  //Save the file name for later:
903  files.Add(fullZipToPath);
904 
905  //Copy the data in buffer chunks
906  using (var streamWriter = File.Create(fullZipToPath))
907  {
908  StreamUtils.Copy(zipStream, streamWriter, buffer);
909  }
910  }
911  }
912  catch
913  {
914  // lets catch the exception just to log some information about the zip file
915  Log.Error($"Compression.UnzipToFolder(): Failure: outFolder: {outFolder} - files: {string.Join(",", files)}");
916  throw;
917  }
918  finally
919  {
920  if (zf != null)
921  {
922  zf.IsStreamOwner = true; // Makes close also shut the underlying stream
923  zf.Close(); // Ensure we release resources
924  }
925  }
926  return files;
927  } // End UnZip
928 
929  /// <summary>
930  /// Extracts all file from a zip archive and copies them to a destination folder.
931  /// </summary>
932  /// <param name="source">The source zip file.</param>
933  /// <param name="destination">The destination folder to extract the file to.</param>
934  public static void UnTarFiles(string source, string destination)
935  {
936  var inStream = File.OpenRead(source);
937  var tarArchive = TarArchive.CreateInputTarArchive(inStream);
938  tarArchive.ExtractContents(destination);
939  tarArchive.Close();
940  inStream.Close();
941  }
942 
943  /// <summary>
944  /// Extract tar.gz files to disk
945  /// </summary>
946  /// <param name="source">Tar.gz source file</param>
947  /// <param name="destination">Location folder to unzip to</param>
948  public static void UnTarGzFiles(string source, string destination)
949  {
950  var inStream = File.OpenRead(source);
951  var gzipStream = new GZipInputStream(inStream);
952  var tarArchive = TarArchive.CreateInputTarArchive(gzipStream);
953  tarArchive.ExtractContents(destination);
954  tarArchive.Close();
955  gzipStream.Close();
956  inStream.Close();
957  }
958 
959  /// <summary>
960  /// Enumerate through the files of a TAR and get a list of KVP names-byte arrays
961  /// </summary>
962  /// <param name="stream">The input tar stream</param>
963  /// <param name="isTarGz">True if the input stream is a .tar.gz or .tgz</param>
964  /// <returns>An enumerable containing each tar entry and it's contents</returns>
965  public static IEnumerable<KeyValuePair<string, byte[]>> UnTar(Stream stream, bool isTarGz)
966  {
967  using (var tar = new TarInputStream(isTarGz ? (Stream)new GZipInputStream(stream) : stream))
968  {
969  TarEntry entry;
970  while ((entry = tar.GetNextEntry()) != null)
971  {
972  if (entry.IsDirectory) continue;
973 
974  using (var output = new MemoryStream())
975  {
976  tar.CopyEntryContents(output);
977  yield return new KeyValuePair<string, byte[]>(entry.Name, output.ToArray());
978  }
979  }
980  }
981  }
982 
983  /// <summary>
984  /// Enumerate through the files of a TAR and get a list of KVP names-byte arrays.
985  /// </summary>
986  /// <param name="source"></param>
987  /// <returns></returns>
988  public static IEnumerable<KeyValuePair<string, byte[]>> UnTar(string source)
989  {
990  //This is a tar.gz file.
991  var gzip = (source.Substring(Math.Max(0, source.Length - 6)) == "tar.gz");
992 
993  using (var file = File.OpenRead(source))
994  {
995  var tarIn = new TarInputStream(file);
996 
997  if (gzip)
998  {
999  var gzipStream = new GZipInputStream(file);
1000  tarIn = new TarInputStream(gzipStream);
1001  }
1002 
1003  TarEntry tarEntry;
1004  while ((tarEntry = tarIn.GetNextEntry()) != null)
1005  {
1006  if (tarEntry.IsDirectory) continue;
1007 
1008  using (var stream = new MemoryStream())
1009  {
1010  tarIn.CopyEntryContents(stream);
1011  yield return new KeyValuePair<string, byte[]>(tarEntry.Name, stream.ToArray());
1012  }
1013  }
1014  tarIn.Close();
1015  }
1016  }
1017 
1018  /// <summary>
1019  /// Validates whether the zip is corrupted or not
1020  /// </summary>
1021  /// <param name="path">Path to the zip file</param>
1022  /// <returns>true if archive tests ok; false otherwise.</returns>
1023  public static bool ValidateZip(string path)
1024  {
1025  using (var zip = new ICSharpCode.SharpZipLib.Zip.ZipFile(path))
1026  {
1027  return zip.TestArchive(true);
1028  }
1029  }
1030 
1031  /// <summary>
1032  /// Returns the entry file names contained in a zip file
1033  /// </summary>
1034  /// <param name="zipFileName">The zip file name</param>
1035  /// <returns>An IEnumerable of entry file names</returns>
1036  public static IEnumerable<string> GetZipEntryFileNames(string zipFileName)
1037  {
1038  using (var zip = ZipFile.Read(zipFileName))
1039  {
1040  return zip.EntryFileNames;
1041  }
1042  }
1043 
1044  /// <summary>
1045  /// Return the entry file names contained in a zip file
1046  /// </summary>
1047  /// <param name="zipFileStream">Stream to the file</param>
1048  /// <returns>IEnumerable of entry file names</returns>
1049  public static IEnumerable<string> GetZipEntryFileNames(Stream zipFileStream)
1050  {
1051  using (var zip = ZipFile.Read(zipFileStream))
1052  {
1053  return zip.EntryFileNames;
1054  }
1055  }
1056 
1057  /// <summary>
1058  /// Extracts a 7-zip archive to disk, using the 7-zip CLI utility
1059  /// </summary>
1060  /// <param name="inputFile">Path to the 7z file</param>
1061  /// <param name="outputDirectory">Directory to output contents of 7z</param>
1062  /// <param name="execTimeout">Timeout in seconds for how long we should wait for the extraction to complete</param>
1063  /// <exception cref="Exception">The extraction failed because of a timeout or the exit code was not 0</exception>
1064  public static void Extract7ZipArchive(string inputFile, string outputDirectory, int execTimeout = 60000)
1065  {
1066  var zipper = IsLinux ? "7z" : "C:/Program Files/7-Zip/7z.exe";
1067  var psi = new ProcessStartInfo(zipper, " e " + inputFile + " -o" + outputDirectory)
1068  {
1069  CreateNoWindow = true,
1070  WindowStyle = ProcessWindowStyle.Hidden,
1071  UseShellExecute = false,
1072  RedirectStandardOutput = false
1073  };
1074 
1075  var process = new Process();
1076  process.StartInfo = psi;
1077  process.Start();
1078 
1079  if (!process.WaitForExit(execTimeout))
1080  {
1081  throw new TimeoutException($"Timed out extracting 7Zip archive: {inputFile} ({execTimeout} seconds)");
1082  }
1083  if (process.ExitCode > 0)
1084  {
1085  throw new Exception($"Compression.Extract7ZipArchive(): 7Zip exited unsuccessfully (code {process.ExitCode})");
1086  }
1087  }
1088  }
1089 }