blob: 4ef5f6517ec4502f5f031320df32362d6a17f4d9 [file] [log] [blame]
David Benjamin1b5cfb52015-02-13 18:38:43 -05001# Copyright (c) 2015, Google Inc.
2#
3# Permission to use, copy, modify, and/or distribute this software for any
4# purpose with or without fee is hereby granted, provided that the above
5# copyright notice and this permission notice appear in all copies.
6#
7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15"""Extracts archives."""
16
17
David Benjamin0ec56392016-12-01 23:48:00 -050018import hashlib
David Benjamin0d5e0802015-02-27 17:23:16 -050019import optparse
David Benjamin1b5cfb52015-02-13 18:38:43 -050020import os
21import os.path
22import tarfile
23import shutil
24import sys
25import zipfile
26
27
David Benjamin0d5e0802015-02-27 17:23:16 -050028def CheckedJoin(output, path):
David Benjamin1b5cfb52015-02-13 18:38:43 -050029 """
David Benjamin0d5e0802015-02-27 17:23:16 -050030 CheckedJoin returns os.path.join(output, path). It does sanity checks to
31 ensure the resulting path is under output, but shouldn't be used on untrusted
32 input.
David Benjamin1b5cfb52015-02-13 18:38:43 -050033 """
David Benjamin0d5e0802015-02-27 17:23:16 -050034 path = os.path.normpath(path)
35 if os.path.isabs(path) or path.startswith('.'):
David Benjamin1b5cfb52015-02-13 18:38:43 -050036 raise ValueError(path)
David Benjamin0d5e0802015-02-27 17:23:16 -050037 return os.path.join(output, path)
David Benjamin1b5cfb52015-02-13 18:38:43 -050038
39
David Benjaminedafe472017-05-11 15:27:01 -040040class FileEntry(object):
41 def __init__(self, path, mode, fileobj):
42 self.path = path
43 self.mode = mode
44 self.fileobj = fileobj
45
46
47class SymlinkEntry(object):
48 def __init__(self, path, mode, target):
49 self.path = path
50 self.mode = mode
51 self.target = target
52
53
David Benjamin1b5cfb52015-02-13 18:38:43 -050054def IterateZip(path):
55 """
David Benjaminedafe472017-05-11 15:27:01 -040056 IterateZip opens the zip file at path and returns a generator of entry objects
57 for each file in it.
David Benjamin1b5cfb52015-02-13 18:38:43 -050058 """
59 with zipfile.ZipFile(path, 'r') as zip_file:
60 for info in zip_file.infolist():
David Benjamin0d5e0802015-02-27 17:23:16 -050061 if info.filename.endswith('/'):
62 continue
David Benjaminedafe472017-05-11 15:27:01 -040063 yield FileEntry(info.filename, None, zip_file.open(info))
David Benjamin1b5cfb52015-02-13 18:38:43 -050064
65
David Benjaminedafe472017-05-11 15:27:01 -040066def IterateTar(path, compression):
David Benjamin1b5cfb52015-02-13 18:38:43 -050067 """
David Benjaminedafe472017-05-11 15:27:01 -040068 IterateTar opens the tar.gz or tar.bz2 file at path and returns a generator of
69 entry objects for each file in it.
David Benjamin1b5cfb52015-02-13 18:38:43 -050070 """
David Benjaminedafe472017-05-11 15:27:01 -040071 with tarfile.open(path, 'r:' + compression) as tar_file:
David Benjamin1b5cfb52015-02-13 18:38:43 -050072 for info in tar_file:
73 if info.isdir():
David Benjaminedafe472017-05-11 15:27:01 -040074 pass
75 elif info.issym():
76 yield SymlinkEntry(info.name, None, info.linkname)
77 elif info.isfile():
78 yield FileEntry(info.name, info.mode, tar_file.extractfile(info))
79 else:
David Benjamin1b5cfb52015-02-13 18:38:43 -050080 raise ValueError('Unknown entry type "%s"' % (info.name, ))
David Benjamin1b5cfb52015-02-13 18:38:43 -050081
82
83def main(args):
David Benjamin0d5e0802015-02-27 17:23:16 -050084 parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT')
85 parser.add_option('--no-prefix', dest='no_prefix', action='store_true',
86 help='Do not remove a prefix from paths in the archive.')
87 options, args = parser.parse_args(args)
88
89 if len(args) != 2:
90 parser.print_help()
David Benjamin1b5cfb52015-02-13 18:38:43 -050091 return 1
92
David Benjamin0d5e0802015-02-27 17:23:16 -050093 archive, output = args
David Benjamin1b5cfb52015-02-13 18:38:43 -050094
95 if not os.path.exists(archive):
96 # Skip archives that weren't downloaded.
97 return 0
98
David Benjamin17c38b32021-10-26 16:27:56 -040099 with open(archive, 'rb') as f:
David Benjamin0ec56392016-12-01 23:48:00 -0500100 sha256 = hashlib.sha256()
101 while True:
102 chunk = f.read(1024 * 1024)
103 if not chunk:
104 break
105 sha256.update(chunk)
106 digest = sha256.hexdigest()
107
108 stamp_path = os.path.join(output, ".boringssl_archive_digest")
109 if os.path.exists(stamp_path):
110 with open(stamp_path) as f:
111 if f.read().strip() == digest:
David Benjamin17c38b32021-10-26 16:27:56 -0400112 print("Already up-to-date.")
David Benjamin0ec56392016-12-01 23:48:00 -0500113 return 0
114
David Benjamin1b5cfb52015-02-13 18:38:43 -0500115 if archive.endswith('.zip'):
116 entries = IterateZip(archive)
117 elif archive.endswith('.tar.gz'):
David Benjaminedafe472017-05-11 15:27:01 -0400118 entries = IterateTar(archive, 'gz')
119 elif archive.endswith('.tar.bz2'):
120 entries = IterateTar(archive, 'bz2')
David Benjaminec85d0d2022-02-23 12:05:27 -0500121 elif archive.endswith('.tar.xz'):
122 entries = IterateTar(archive, 'xz')
David Benjamin1b5cfb52015-02-13 18:38:43 -0500123 else:
124 raise ValueError(archive)
125
126 try:
127 if os.path.exists(output):
David Benjamin17c38b32021-10-26 16:27:56 -0400128 print("Removing %s" % (output, ))
David Benjamin1b5cfb52015-02-13 18:38:43 -0500129 shutil.rmtree(output)
130
David Benjamin17c38b32021-10-26 16:27:56 -0400131 print("Extracting %s to %s" % (archive, output))
David Benjamin1b5cfb52015-02-13 18:38:43 -0500132 prefix = None
David Benjamin0d5e0802015-02-27 17:23:16 -0500133 num_extracted = 0
David Benjaminedafe472017-05-11 15:27:01 -0400134 for entry in entries:
David Benjamin0d5e0802015-02-27 17:23:16 -0500135 # Even on Windows, zip files must always use forward slashes.
David Benjaminedafe472017-05-11 15:27:01 -0400136 if '\\' in entry.path or entry.path.startswith('/'):
137 raise ValueError(entry.path)
David Benjamin1b5cfb52015-02-13 18:38:43 -0500138
David Benjamin0d5e0802015-02-27 17:23:16 -0500139 if not options.no_prefix:
David Benjaminedafe472017-05-11 15:27:01 -0400140 new_prefix, rest = entry.path.split('/', 1)
David Benjamin1b5cfb52015-02-13 18:38:43 -0500141
David Benjamin0d5e0802015-02-27 17:23:16 -0500142 # Ensure the archive is consistent.
143 if prefix is None:
144 prefix = new_prefix
145 if prefix != new_prefix:
146 raise ValueError((prefix, new_prefix))
147 else:
David Benjaminedafe472017-05-11 15:27:01 -0400148 rest = entry.path
David Benjamin0d5e0802015-02-27 17:23:16 -0500149
150 # Extract the file into the output directory.
151 fixed_path = CheckedJoin(output, rest)
David Benjamin1b5cfb52015-02-13 18:38:43 -0500152 if not os.path.isdir(os.path.dirname(fixed_path)):
153 os.makedirs(os.path.dirname(fixed_path))
David Benjaminedafe472017-05-11 15:27:01 -0400154 if isinstance(entry, FileEntry):
155 with open(fixed_path, 'wb') as out:
156 shutil.copyfileobj(entry.fileobj, out)
157 elif isinstance(entry, SymlinkEntry):
158 os.symlink(entry.target, fixed_path)
159 else:
160 raise TypeError('unknown entry type')
David Benjamin1b5cfb52015-02-13 18:38:43 -0500161
162 # Fix up permissions if needbe.
163 # TODO(davidben): To be extra tidy, this should only track the execute bit
164 # as in git.
David Benjaminedafe472017-05-11 15:27:01 -0400165 if entry.mode is not None:
166 os.chmod(fixed_path, entry.mode)
David Benjamin0d5e0802015-02-27 17:23:16 -0500167
168 # Print every 100 files, so bots do not time out on large archives.
169 num_extracted += 1
170 if num_extracted % 100 == 0:
David Benjamin17c38b32021-10-26 16:27:56 -0400171 print("Extracted %d files..." % (num_extracted,))
David Benjamin1b5cfb52015-02-13 18:38:43 -0500172 finally:
173 entries.close()
174
David Benjamin0ec56392016-12-01 23:48:00 -0500175 with open(stamp_path, 'w') as f:
176 f.write(digest)
David Benjamin0d5e0802015-02-27 17:23:16 -0500177
David Benjamin17c38b32021-10-26 16:27:56 -0400178 print("Done. Extracted %d files." % (num_extracted,))
David Benjamin1b5cfb52015-02-13 18:38:43 -0500179 return 0
180
181
182if __name__ == '__main__':
David Benjamin0d5e0802015-02-27 17:23:16 -0500183 sys.exit(main(sys.argv[1:]))