123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346 |
- # -*- coding: UTF-8 -*-
- # Copyright (c) 2019 The ungoogled-chromium Authors. All rights reserved.
- # Use of this source code is governed by a BSD-style license that can be
- # found in the LICENSE file.
- """
- Archive extraction utilities
- """
- import os
- import shutil
- import subprocess
- import tarfile
- from pathlib import Path, PurePosixPath
- from _common import (USE_REGISTRY, PlatformEnum, ExtractorEnum, get_logger, get_running_platform)
- from prune_binaries import CONTINGENT_PATHS
- DEFAULT_EXTRACTORS = {
- ExtractorEnum.SEVENZIP: USE_REGISTRY,
- ExtractorEnum.TAR: 'tar',
- ExtractorEnum.WINRAR: USE_REGISTRY,
- }
- def _find_7z_by_registry():
- """
- Return a string to 7-zip's 7z.exe from the Windows Registry.
- """
- import winreg #pylint: disable=import-error, import-outside-toplevel
- sub_key_7zfm = 'SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\7zFM.exe'
- try:
- with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, sub_key_7zfm) as key_handle:
- sevenzipfm_dir = winreg.QueryValueEx(key_handle, 'Path')[0]
- except OSError:
- get_logger().exception('Unable to locate 7-zip from the Windows Registry')
- raise
- sevenzip_path = Path(sevenzipfm_dir, '7z.exe')
- if not sevenzip_path.is_file():
- get_logger().error('7z.exe not found at path from registry: %s', sevenzip_path)
- return sevenzip_path
- def _find_winrar_by_registry():
- """
- Return a string to WinRAR's WinRAR.exe from the Windows Registry.
- """
- import winreg #pylint: disable=import-error, import-outside-toplevel
- sub_key_winrar = 'SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\WinRAR.exe'
- try:
- with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, sub_key_winrar) as key_handle:
- winrar_dir = winreg.QueryValueEx(key_handle, 'Path')[0]
- except OSError:
- get_logger().exception('Unable to locale WinRAR from the Windows Registry')
- raise
- winrar_path = Path(winrar_dir, 'WinRAR.exe')
- if not winrar_path.is_file():
- get_logger().error('WinRAR.exe not found at path from registry: %s', winrar_path)
- return winrar_path
- def _find_extractor_by_cmd(extractor_cmd):
- """Returns a string path to the binary; None if it couldn't be found"""
- if not extractor_cmd:
- return None
- if Path(extractor_cmd).is_file():
- return extractor_cmd
- return shutil.which(extractor_cmd)
- def _process_relative_to(unpack_root, relative_to):
- """
- For an extractor that doesn't support an automatic transform, move the extracted
- contents from the relative_to/ directory to the unpack_root
- If relative_to is None, nothing is done.
- """
- if relative_to is None:
- return
- relative_root = unpack_root / relative_to
- if not relative_root.is_dir():
- get_logger().error('Could not find relative_to directory in extracted files: %s',
- relative_to)
- raise Exception()
- for src_path in relative_root.iterdir():
- dest_path = unpack_root / src_path.name
- src_path.rename(dest_path)
- relative_root.rmdir()
- def _extract_tar_with_7z(binary, archive_path, output_dir, relative_to, skip_unused):
- get_logger().debug('Using 7-zip extractor')
- if not relative_to is None and (output_dir / relative_to).exists():
- get_logger().error('Temporary unpacking directory already exists: %s',
- output_dir / relative_to)
- raise Exception()
- cmd1 = (binary, 'x', str(archive_path), '-so')
- cmd2 = (binary, 'x', '-si', '-aoa', '-ttar', '-o{}'.format(str(output_dir)))
- if skip_unused:
- for cpath in CONTINGENT_PATHS:
- cmd2 += ('-x!%s/%s' % (str(relative_to), cpath[:-1]), )
- get_logger().debug('7z command line: %s | %s', ' '.join(cmd1), ' '.join(cmd2))
- proc1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE)
- proc2 = subprocess.Popen(cmd2, stdin=proc1.stdout, stdout=subprocess.PIPE)
- proc1.stdout.close()
- (stdout_data, stderr_data) = proc2.communicate()
- if proc2.returncode != 0:
- get_logger().error('7z commands returned non-zero status: %s', proc2.returncode)
- get_logger().debug('stdout: %s', stdout_data)
- get_logger().debug('stderr: %s', stderr_data)
- raise Exception()
- _process_relative_to(output_dir, relative_to)
- def _extract_tar_with_tar(binary, archive_path, output_dir, relative_to, skip_unused):
- get_logger().debug('Using BSD or GNU tar extractor')
- output_dir.mkdir(exist_ok=True)
- cmd = (binary, '-xf', str(archive_path), '-C', str(output_dir))
- if skip_unused:
- for cpath in CONTINGENT_PATHS:
- cmd += ('--exclude=%s/%s' % (str(relative_to), cpath[:-1]), )
- get_logger().debug('tar command line: %s', ' '.join(cmd))
- result = subprocess.run(cmd, check=False)
- if result.returncode != 0:
- get_logger().error('tar command returned %s', result.returncode)
- raise Exception()
- # for gnu tar, the --transform option could be used. but to keep compatibility with
- # bsdtar on macos, we just do this ourselves
- _process_relative_to(output_dir, relative_to)
- def _extract_tar_with_winrar(binary, archive_path, output_dir, relative_to, skip_unused):
- get_logger().debug('Using WinRAR extractor')
- output_dir.mkdir(exist_ok=True)
- cmd = (binary, 'x', '-o+', str(archive_path), str(output_dir))
- if skip_unused:
- for cpath in CONTINGENT_PATHS:
- cmd += ('-x%s%s%s' % (str(relative_to), os.sep, cpath[:-1].replace('/')), )
- get_logger().debug('WinRAR command line: %s', ' '.join(cmd))
- result = subprocess.run(cmd, check=False)
- if result.returncode != 0:
- get_logger().error('WinRAR command returned %s', result.returncode)
- raise Exception()
- _process_relative_to(output_dir, relative_to)
- def _extract_tar_with_python(archive_path, output_dir, relative_to, skip_unused):
- get_logger().debug('Using pure Python tar extractor')
- class NoAppendList(list):
- """Hack to workaround memory issues with large tar files"""
- def append(self, obj):
- pass
- # Simple hack to check if symlinks are supported
- try:
- os.symlink('', '')
- except FileNotFoundError:
- # Symlinks probably supported
- symlink_supported = True
- except OSError:
- # Symlinks probably not supported
- get_logger().info('System does not support symlinks. Ignoring them.')
- symlink_supported = False
- except BaseException:
- # Unexpected exception
- get_logger().exception('Unexpected exception during symlink support check.')
- raise
- with tarfile.open(str(archive_path), 'r|%s' % archive_path.suffix[1:]) as tar_file_obj:
- tar_file_obj.members = NoAppendList()
- for tarinfo in tar_file_obj:
- try:
- if skip_unused and [
- cpath for cpath in CONTINGENT_PATHS
- if tarinfo.name.startswith(str(relative_to) + '/' + cpath)
- ]:
- continue
- if relative_to is None:
- destination = output_dir / PurePosixPath(tarinfo.name)
- else:
- destination = output_dir / PurePosixPath(tarinfo.name).relative_to(relative_to)
- if tarinfo.issym() and not symlink_supported:
- # In this situation, TarFile.makelink() will try to create a copy of the
- # target. But this fails because TarFile.members is empty
- # But if symlinks are not supported, it's safe to assume that symlinks
- # aren't needed. The only situation where this happens is on Windows.
- continue
- if tarinfo.islnk():
- # Derived from TarFile.extract()
- new_target = output_dir / PurePosixPath(
- tarinfo.linkname).relative_to(relative_to)
- tarinfo._link_target = new_target.as_posix() # pylint: disable=protected-access
- if destination.is_symlink():
- destination.unlink()
- tar_file_obj._extract_member(tarinfo, str(destination)) # pylint: disable=protected-access
- except BaseException:
- get_logger().exception('Exception thrown for tar member: %s', tarinfo.name)
- raise
- def extract_tar_file(archive_path, output_dir, relative_to, skip_unused, extractors=None):
- """
- Extract regular or compressed tar archive into the output directory.
- archive_path is the pathlib.Path to the archive to unpack
- output_dir is a pathlib.Path to the directory to unpack. It must already exist.
- relative_to is a pathlib.Path for directories that should be stripped relative to the
- root of the archive, or None if no path components should be stripped.
- extractors is a dictionary of PlatformEnum to a command or path to the
- extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip and WinRAR.
- """
- if extractors is None:
- extractors = DEFAULT_EXTRACTORS
- current_platform = get_running_platform()
- if current_platform == PlatformEnum.WINDOWS:
- # Try to use 7-zip first
- sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP)
- if sevenzip_cmd == USE_REGISTRY:
- sevenzip_cmd = str(_find_7z_by_registry())
- sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd)
- if sevenzip_bin is not None:
- _extract_tar_with_7z(sevenzip_bin, archive_path, output_dir, relative_to, skip_unused)
- return
- # Use WinRAR if 7-zip is not found
- winrar_cmd = extractors.get(ExtractorEnum.WINRAR)
- if winrar_cmd == USE_REGISTRY:
- winrar_cmd = str(_find_winrar_by_registry())
- winrar_bin = _find_extractor_by_cmd(winrar_cmd)
- if winrar_bin is not None:
- _extract_tar_with_winrar(winrar_bin, archive_path, output_dir, relative_to, skip_unused)
- return
- get_logger().warning(
- 'Neither 7-zip nor WinRAR were found. Falling back to Python extractor...')
- elif current_platform == PlatformEnum.UNIX:
- # NOTE: 7-zip isn't an option because it doesn't preserve file permissions
- tar_bin = _find_extractor_by_cmd(extractors.get(ExtractorEnum.TAR))
- if not tar_bin is None:
- _extract_tar_with_tar(tar_bin, archive_path, output_dir, relative_to, skip_unused)
- return
- else:
- # This is not a normal code path, so make it clear.
- raise NotImplementedError(current_platform)
- # Fallback to Python-based extractor on all platforms
- _extract_tar_with_python(archive_path, output_dir, relative_to, skip_unused)
- def extract_with_7z(
- archive_path,
- output_dir,
- relative_to, #pylint: disable=too-many-arguments
- skip_unused,
- extractors=None):
- """
- Extract archives with 7-zip into the output directory.
- Only supports archives with one layer of unpacking, so compressed tar archives don't work.
- archive_path is the pathlib.Path to the archive to unpack
- output_dir is a pathlib.Path to the directory to unpack. It must already exist.
- relative_to is a pathlib.Path for directories that should be stripped relative to the
- root of the archive.
- extractors is a dictionary of PlatformEnum to a command or path to the
- extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip.
- """
- # TODO: It would be nice to extend this to support arbitrary standard IO chaining of 7z
- # instances, so _extract_tar_with_7z and other future formats could use this.
- if extractors is None:
- extractors = DEFAULT_EXTRACTORS
- sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP)
- if sevenzip_cmd == USE_REGISTRY:
- if not get_running_platform() == PlatformEnum.WINDOWS:
- get_logger().error('"%s" for 7-zip is only available on Windows', sevenzip_cmd)
- raise Exception()
- sevenzip_cmd = str(_find_7z_by_registry())
- sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd)
- if not relative_to is None and (output_dir / relative_to).exists():
- get_logger().error('Temporary unpacking directory already exists: %s',
- output_dir / relative_to)
- raise Exception()
- cmd = (sevenzip_bin, 'x', str(archive_path), '-aoa', '-o{}'.format(str(output_dir)))
- if skip_unused:
- for cpath in CONTINGENT_PATHS:
- cmd += ('-x!%s/%s' % (str(relative_to), cpath[:-1]), )
- get_logger().debug('7z command line: %s', ' '.join(cmd))
- result = subprocess.run(cmd, check=False)
- if result.returncode != 0:
- get_logger().error('7z command returned %s', result.returncode)
- raise Exception()
- _process_relative_to(output_dir, relative_to)
- def extract_with_winrar(
- archive_path,
- output_dir,
- relative_to, #pylint: disable=too-many-arguments
- skip_unused,
- extractors=None):
- """
- Extract archives with WinRAR into the output directory.
- Only supports archives with one layer of unpacking, so compressed tar archives don't work.
- archive_path is the pathlib.Path to the archive to unpack
- output_dir is a pathlib.Path to the directory to unpack. It must already exist.
- relative_to is a pathlib.Path for directories that should be stripped relative to the
- root of the archive.
- extractors is a dictionary of PlatformEnum to a command or path to the
- extractor binary. Defaults to 'tar' for tar, and '_use_registry' for WinRAR.
- """
- if extractors is None:
- extractors = DEFAULT_EXTRACTORS
- winrar_cmd = extractors.get(ExtractorEnum.WINRAR)
- if winrar_cmd == USE_REGISTRY:
- if not get_running_platform() == PlatformEnum.WINDOWS:
- get_logger().error('"%s" for WinRAR is only available on Windows', winrar_cmd)
- raise Exception()
- winrar_cmd = str(_find_winrar_by_registry())
- winrar_bin = _find_extractor_by_cmd(winrar_cmd)
- if not relative_to is None and (output_dir / relative_to).exists():
- get_logger().error('Temporary unpacking directory already exists: %s',
- output_dir / relative_to)
- raise Exception()
- cmd = (winrar_bin, 'x', '-o+', str(archive_path), str(output_dir))
- if skip_unused:
- for cpath in CONTINGENT_PATHS:
- cmd += ('-x%s%s%s' % (str(relative_to), os.sep, cpath[:-1].replace('/', os.sep)), )
- get_logger().debug('WinRAR command line: %s', ' '.join(cmd))
- result = subprocess.run(cmd, check=False)
- if result.returncode != 0:
- get_logger().error('WinRAR command returned %s', result.returncode)
- raise Exception()
- _process_relative_to(output_dir, relative_to)
|