David Benjamin | 1b5cfb5 | 2015-02-13 18:38:43 -0500 | [diff] [blame^] | 1 | # Copyright (c) 2015, Google Inc. |
| 2 | # |
| 3 | # Permission to use, copy, modify, and/or distribute this software for any |
| 4 | # purpose with or without fee is hereby granted, provided that the above |
| 5 | # copyright notice and this permission notice appear in all copies. |
| 6 | # |
| 7 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 8 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 9 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
| 10 | # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 11 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| 12 | # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| 13 | # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 14 | |
| 15 | """Extracts archives.""" |
| 16 | |
| 17 | |
| 18 | import os |
| 19 | import os.path |
| 20 | import tarfile |
| 21 | import shutil |
| 22 | import sys |
| 23 | import zipfile |
| 24 | |
| 25 | |
| 26 | def FixPath(output, path): |
| 27 | """ |
| 28 | FixPath removes the first directory from path and returns the it and the |
| 29 | concatenation of output and the remainder. It does sanity checks to ensure |
| 30 | the resulting path is under output, but shouldn't be used on untrusted input. |
| 31 | """ |
| 32 | # Even on Windows, zip files must always use forward slashes. |
| 33 | if '\\' in path or path.startswith('/'): |
| 34 | raise ValueError(path) |
| 35 | |
| 36 | first, rest = path.split('/', 1) |
| 37 | rest = os.path.normpath(rest) |
| 38 | if os.path.isabs(rest) or rest.startswith('.'): |
| 39 | raise ValueError(rest) |
| 40 | return first, os.path.join(output, rest) |
| 41 | |
| 42 | |
| 43 | def IterateZip(path): |
| 44 | """ |
| 45 | IterateZip opens the zip file at path and returns a generator of |
| 46 | (filename, mode, fileobj) tuples for each file in it. |
| 47 | """ |
| 48 | with zipfile.ZipFile(path, 'r') as zip_file: |
| 49 | for info in zip_file.infolist(): |
| 50 | yield (info.filename, None, zip_file.open(info)) |
| 51 | |
| 52 | |
| 53 | def IterateTar(path): |
| 54 | """ |
| 55 | IterateTar opens the tar.gz file at path and returns a generator of |
| 56 | (filename, mode, fileobj) tuples for each file in it. |
| 57 | """ |
| 58 | with tarfile.open(path, 'r:gz') as tar_file: |
| 59 | for info in tar_file: |
| 60 | if info.isdir(): |
| 61 | continue |
| 62 | if not info.isfile(): |
| 63 | raise ValueError('Unknown entry type "%s"' % (info.name, )) |
| 64 | yield (info.name, info.mode, tar_file.extractfile(info)) |
| 65 | |
| 66 | |
| 67 | def main(args): |
| 68 | if len(args) != 3: |
| 69 | print >> sys.stderr, 'Usage: %s ARCHIVE OUTPUT' % (args[0],) |
| 70 | return 1 |
| 71 | |
| 72 | _, archive, output = args |
| 73 | |
| 74 | if not os.path.exists(archive): |
| 75 | # Skip archives that weren't downloaded. |
| 76 | return 0 |
| 77 | |
| 78 | if archive.endswith('.zip'): |
| 79 | entries = IterateZip(archive) |
| 80 | elif archive.endswith('.tar.gz'): |
| 81 | entries = IterateTar(archive) |
| 82 | else: |
| 83 | raise ValueError(archive) |
| 84 | |
| 85 | try: |
| 86 | if os.path.exists(output): |
| 87 | print "Removing %s" % (output, ) |
| 88 | shutil.rmtree(output) |
| 89 | |
| 90 | print "Extracting %s to %s" % (archive, output) |
| 91 | prefix = None |
| 92 | for path, mode, inp in entries: |
| 93 | # Pivot the path onto the output directory. |
| 94 | new_prefix, fixed_path = FixPath(output, path) |
| 95 | |
| 96 | # Ensure the archive is consistent. |
| 97 | if prefix is None: |
| 98 | prefix = new_prefix |
| 99 | if prefix != new_prefix: |
| 100 | raise ValueError((prefix, new_prefix)) |
| 101 | |
| 102 | # Extract the file. |
| 103 | if not os.path.isdir(os.path.dirname(fixed_path)): |
| 104 | os.makedirs(os.path.dirname(fixed_path)) |
| 105 | with open(fixed_path, 'w') as out: |
| 106 | out.write(inp.read()) |
| 107 | |
| 108 | # Fix up permissions if needbe. |
| 109 | # TODO(davidben): To be extra tidy, this should only track the execute bit |
| 110 | # as in git. |
| 111 | if mode is not None: |
| 112 | os.chmod(fixed_path, mode) |
| 113 | finally: |
| 114 | entries.close() |
| 115 | |
| 116 | return 0 |
| 117 | |
| 118 | |
| 119 | if __name__ == '__main__': |
| 120 | sys.exit(main(sys.argv)) |