|  | # Copyright 2015 The BoringSSL Authors | 
|  | # | 
|  | # Permission to use, copy, modify, and/or distribute this software for any | 
|  | # purpose with or without fee is hereby granted, provided that the above | 
|  | # copyright notice and this permission notice appear in all copies. | 
|  | # | 
|  | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | 
|  | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | 
|  | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | 
|  | # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 
|  | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION | 
|  | # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | 
|  | # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 
|  |  | 
|  | """Extracts archives.""" | 
|  |  | 
|  |  | 
|  | import hashlib | 
|  | import optparse | 
|  | import os | 
|  | import os.path | 
|  | import tarfile | 
|  | import shutil | 
|  | import sys | 
|  | import zipfile | 
|  |  | 
|  |  | 
|  | def CheckedJoin(output, path): | 
|  | """ | 
|  | CheckedJoin returns os.path.join(output, path). It does sanity checks to | 
|  | ensure the resulting path is under output, but shouldn't be used on untrusted | 
|  | input. | 
|  | """ | 
|  | path = os.path.normpath(path) | 
|  | if os.path.isabs(path) or path.startswith('.'): | 
|  | raise ValueError(path) | 
|  | return os.path.join(output, path) | 
|  |  | 
|  |  | 
|  | class FileEntry(object): | 
|  | def __init__(self, path, mode, fileobj): | 
|  | self.path = path | 
|  | self.mode = mode | 
|  | self.fileobj = fileobj | 
|  |  | 
|  |  | 
|  | class SymlinkEntry(object): | 
|  | def __init__(self, path, mode, target): | 
|  | self.path = path | 
|  | self.mode = mode | 
|  | self.target = target | 
|  |  | 
|  |  | 
|  | def IterateZip(path): | 
|  | """ | 
|  | IterateZip opens the zip file at path and returns a generator of entry objects | 
|  | for each file in it. | 
|  | """ | 
|  | with zipfile.ZipFile(path, 'r') as zip_file: | 
|  | for info in zip_file.infolist(): | 
|  | if info.filename.endswith('/'): | 
|  | continue | 
|  | yield FileEntry(info.filename, None, zip_file.open(info)) | 
|  |  | 
|  |  | 
|  | def IterateTar(path, compression): | 
|  | """ | 
|  | IterateTar opens the tar.gz or tar.bz2 file at path and returns a generator of | 
|  | entry objects for each file in it. | 
|  | """ | 
|  | with tarfile.open(path, 'r:' + compression) as tar_file: | 
|  | for info in tar_file: | 
|  | if info.isdir(): | 
|  | pass | 
|  | elif info.issym(): | 
|  | yield SymlinkEntry(info.name, None, info.linkname) | 
|  | elif info.isfile(): | 
|  | yield FileEntry(info.name, info.mode, tar_file.extractfile(info)) | 
|  | else: | 
|  | raise ValueError('Unknown entry type "%s"' % (info.name, )) | 
|  |  | 
|  |  | 
|  | def main(args): | 
|  | parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT') | 
|  | parser.add_option('--no-prefix', dest='no_prefix', action='store_true', | 
|  | help='Do not remove a prefix from paths in the archive.') | 
|  | options, args = parser.parse_args(args) | 
|  |  | 
|  | if len(args) != 2: | 
|  | parser.print_help() | 
|  | return 1 | 
|  |  | 
|  | archive, output = args | 
|  |  | 
|  | if not os.path.exists(archive): | 
|  | # Skip archives that weren't downloaded. | 
|  | return 0 | 
|  |  | 
|  | with open(archive, 'rb') as f: | 
|  | sha256 = hashlib.sha256() | 
|  | while True: | 
|  | chunk = f.read(1024 * 1024) | 
|  | if not chunk: | 
|  | break | 
|  | sha256.update(chunk) | 
|  | digest = sha256.hexdigest() | 
|  |  | 
|  | stamp_path = os.path.join(output, ".boringssl_archive_digest") | 
|  | if os.path.exists(stamp_path): | 
|  | with open(stamp_path) as f: | 
|  | if f.read().strip() == digest: | 
|  | print("Already up-to-date.") | 
|  | return 0 | 
|  |  | 
|  | if archive.endswith('.zip'): | 
|  | entries = IterateZip(archive) | 
|  | elif archive.endswith('.tar.gz'): | 
|  | entries = IterateTar(archive, 'gz') | 
|  | elif archive.endswith('.tar.bz2'): | 
|  | entries = IterateTar(archive, 'bz2') | 
|  | elif archive.endswith('.tar.xz'): | 
|  | entries = IterateTar(archive, 'xz') | 
|  | else: | 
|  | raise ValueError(archive) | 
|  |  | 
|  | try: | 
|  | if os.path.exists(output): | 
|  | print("Removing %s" % (output, )) | 
|  | shutil.rmtree(output) | 
|  |  | 
|  | print("Extracting %s to %s" % (archive, output)) | 
|  | prefix = None | 
|  | num_extracted = 0 | 
|  | for entry in entries: | 
|  | # Even on Windows, zip files must always use forward slashes. | 
|  | if '\\' in entry.path or entry.path.startswith('/'): | 
|  | raise ValueError(entry.path) | 
|  |  | 
|  | if not options.no_prefix: | 
|  | new_prefix, rest = entry.path.split('/', 1) | 
|  |  | 
|  | # Ensure the archive is consistent. | 
|  | if prefix is None: | 
|  | prefix = new_prefix | 
|  | if prefix != new_prefix: | 
|  | raise ValueError((prefix, new_prefix)) | 
|  | else: | 
|  | rest = entry.path | 
|  |  | 
|  | # Extract the file into the output directory. | 
|  | fixed_path = CheckedJoin(output, rest) | 
|  | if not os.path.isdir(os.path.dirname(fixed_path)): | 
|  | os.makedirs(os.path.dirname(fixed_path)) | 
|  | if isinstance(entry, FileEntry): | 
|  | with open(fixed_path, 'wb') as out: | 
|  | shutil.copyfileobj(entry.fileobj, out) | 
|  | elif isinstance(entry, SymlinkEntry): | 
|  | os.symlink(entry.target, fixed_path) | 
|  | else: | 
|  | raise TypeError('unknown entry type') | 
|  |  | 
|  | # Fix up permissions if needbe. | 
|  | # TODO(davidben): To be extra tidy, this should only track the execute bit | 
|  | # as in git. | 
|  | if entry.mode is not None: | 
|  | os.chmod(fixed_path, entry.mode) | 
|  |  | 
|  | # Print every 100 files, so bots do not time out on large archives. | 
|  | num_extracted += 1 | 
|  | if num_extracted % 100 == 0: | 
|  | print("Extracted %d files..." % (num_extracted,)) | 
|  | finally: | 
|  | entries.close() | 
|  |  | 
|  | with open(stamp_path, 'w') as f: | 
|  | f.write(digest) | 
|  |  | 
|  | print("Done. Extracted %d files." % (num_extracted,)) | 
|  | return 0 | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | sys.exit(main(sys.argv[1:])) |