downloads.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # -*- coding: UTF-8 -*-
  2. # Copyright (c) 2018 The ungoogled-chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """
  6. Module for the downloading, checking, and unpacking of necessary files into the source tree
  7. """
  8. import enum
  9. import urllib.request
  10. import hashlib
  11. from pathlib import Path
  12. from .common import ENCODING, BuildkitError, ExtractorEnum, get_logger
  13. from .extraction import extract_tar_file, extract_with_7z
  14. # Constants
  15. class HashesURLEnum(str, enum.Enum):
  16. """Enum for supported hash URL schemes"""
  17. chromium = 'chromium'
  18. # Custom Exceptions
  19. class HashMismatchError(BuildkitError):
  20. """Exception for computed hashes not matching expected hashes"""
  21. class _UrlRetrieveReportHook: #pylint: disable=too-few-public-methods
  22. """Hook for urllib.request.urlretrieve to log progress information to console"""
  23. def __init__(self):
  24. self._max_len_printed = 0
  25. self._last_percentage = None
  26. def __call__(self, block_count, block_size, total_size):
  27. downloaded_estimate = block_count * block_size
  28. percentage = round(downloaded_estimate / total_size, ndigits=3)
  29. if percentage == self._last_percentage:
  30. return # Do not needlessly update the console
  31. self._last_percentage = percentage
  32. print('\r' + ' ' * self._max_len_printed, end='')
  33. if total_size > 0:
  34. status_line = 'Progress: {:.1%} of {:,d} B'.format(percentage, total_size)
  35. else:
  36. status_line = 'Progress: {:,d} B of unknown size'.format(downloaded_estimate)
  37. self._max_len_printed = len(status_line)
  38. print('\r' + status_line, end='')
  39. def _download_if_needed(file_path, url, show_progress):
  40. """
  41. Downloads a file from url to the specified path file_path if necessary.
  42. If show_progress is True, download progress is printed to the console.
  43. """
  44. if file_path.exists():
  45. get_logger().info('%s already exists. Skipping download.', file_path)
  46. else:
  47. get_logger().info('Downloading %s ...', file_path)
  48. reporthook = None
  49. if show_progress:
  50. reporthook = _UrlRetrieveReportHook()
  51. urllib.request.urlretrieve(url, str(file_path), reporthook=reporthook)
  52. if show_progress:
  53. print()
  54. def _chromium_hashes_generator(hashes_path):
  55. with hashes_path.open(encoding=ENCODING) as hashes_file:
  56. hash_lines = hashes_file.read().splitlines()
  57. for hash_name, hash_hex, _ in map(lambda x: x.lower().split(' '), hash_lines):
  58. if hash_name in hashlib.algorithms_available:
  59. yield hash_name, hash_hex
  60. else:
  61. get_logger().warning('Skipping unknown hash algorithm: %s', hash_name)
  62. def _downloads_iter(config_bundle):
  63. """Iterator for the downloads ordered by output path"""
  64. return sorted(
  65. map(lambda x: (x, config_bundle.downloads[x]), config_bundle.downloads),
  66. key=(lambda x: str(Path(x[1].output_path))))
  67. def _get_hash_pairs(download_properties, cache_dir):
  68. """Generator of (hash_name, hash_hex) for the given download"""
  69. for entry_type, entry_value in download_properties.hashes.items():
  70. if entry_type == 'hash_url':
  71. hash_processor, hash_filename, _ = entry_value
  72. if hash_processor == 'chromium':
  73. yield from _chromium_hashes_generator(cache_dir / hash_filename)
  74. else:
  75. raise ValueError('Unknown hash_url processor: %s' % hash_processor)
  76. else:
  77. yield entry_type, entry_value
  78. def retrieve_downloads(config_bundle, cache_dir, show_progress, disable_ssl_verification=False):
  79. """
  80. Retrieve downloads into the downloads cache.
  81. config_bundle is the config.ConfigBundle to retrieve downloads for.
  82. cache_dir is the pathlib.Path to the downloads cache.
  83. show_progress is a boolean indicating if download progress is printed to the console.
  84. disable_ssl_verification is a boolean indicating if certificate verification
  85. should be disabled for downloads using HTTPS.
  86. Raises FileNotFoundError if the downloads path does not exist.
  87. Raises NotADirectoryError if the downloads path is not a directory.
  88. """
  89. if not cache_dir.exists():
  90. raise FileNotFoundError(cache_dir)
  91. if not cache_dir.is_dir():
  92. raise NotADirectoryError(cache_dir)
  93. if disable_ssl_verification:
  94. import ssl
  95. # TODO: Remove this or properly implement disabling SSL certificate verification
  96. orig_https_context = ssl._create_default_https_context #pylint: disable=protected-access
  97. ssl._create_default_https_context = ssl._create_unverified_context #pylint: disable=protected-access
  98. try:
  99. for download_name, download_properties in _downloads_iter(config_bundle):
  100. get_logger().info('Downloading "%s" to "%s" ...', download_name,
  101. download_properties.download_filename)
  102. download_path = cache_dir / download_properties.download_filename
  103. _download_if_needed(download_path, download_properties.url, show_progress)
  104. if download_properties.has_hash_url():
  105. get_logger().info('Downloading hashes for "%s"', download_name)
  106. _, hash_filename, hash_url = download_properties.hashes['hash_url']
  107. _download_if_needed(cache_dir / hash_filename, hash_url, show_progress)
  108. finally:
  109. # Try to reduce damage of hack by reverting original HTTPS context ASAP
  110. if disable_ssl_verification:
  111. ssl._create_default_https_context = orig_https_context #pylint: disable=protected-access
  112. def check_downloads(config_bundle, cache_dir):
  113. """
  114. Check integrity of the downloads cache.
  115. config_bundle is the config.ConfigBundle to unpack downloads for.
  116. cache_dir is the pathlib.Path to the downloads cache.
  117. Raises source_retrieval.HashMismatchError when the computed and expected hashes do not match.
  118. """
  119. for download_name, download_properties in _downloads_iter(config_bundle):
  120. get_logger().info('Verifying hashes for "%s" ...', download_name)
  121. download_path = cache_dir / download_properties.download_filename
  122. with download_path.open('rb') as file_obj:
  123. archive_data = file_obj.read()
  124. for hash_name, hash_hex in _get_hash_pairs(download_properties, cache_dir):
  125. get_logger().debug('Verifying %s hash...', hash_name)
  126. hasher = hashlib.new(hash_name, data=archive_data)
  127. if not hasher.hexdigest().lower() == hash_hex.lower():
  128. raise HashMismatchError(download_path)
  129. def unpack_downloads(config_bundle, cache_dir, output_dir, extractors=None):
  130. """
  131. Unpack downloads in the downloads cache to output_dir. Assumes all downloads are retrieved.
  132. config_bundle is the config.ConfigBundle to unpack downloads for.
  133. cache_dir is the pathlib.Path directory containing the download cache
  134. output_dir is the pathlib.Path directory to unpack the downloads to.
  135. extractors is a dictionary of PlatformEnum to a command or path to the
  136. extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip.
  137. May raise undetermined exceptions during archive unpacking.
  138. """
  139. for download_name, download_properties in _downloads_iter(config_bundle):
  140. download_path = cache_dir / download_properties.download_filename
  141. get_logger().info('Unpacking "%s" to %s ...', download_name,
  142. download_properties.output_path)
  143. extractor_name = download_properties.extractor or ExtractorEnum.TAR
  144. if extractor_name == ExtractorEnum.SEVENZIP:
  145. extractor_func = extract_with_7z
  146. elif extractor_name == ExtractorEnum.TAR:
  147. extractor_func = extract_tar_file
  148. else:
  149. raise NotImplementedError(extractor_name)
  150. if download_properties.strip_leading_dirs is None:
  151. strip_leading_dirs_path = None
  152. else:
  153. strip_leading_dirs_path = Path(download_properties.strip_leading_dirs)
  154. extractor_func(
  155. archive_path=download_path,
  156. output_dir=output_dir / Path(download_properties.output_path),
  157. relative_to=strip_leading_dirs_path,
  158. extractors=extractors)