diff --git a/lib/zipp/__init__.py b/lib/zipp/__init__.py index d65297b8..161a4fb1 100644 --- a/lib/zipp/__init__.py +++ b/lib/zipp/__init__.py @@ -1,3 +1,12 @@ +""" +A Path-like interface for zipfiles. + +This codebase is shared between zipfile.Path in the stdlib +and zipp in PyPI. See +https://github.com/python/importlib_metadata/wiki/Development-Methodology +for more detail. +""" + import io import posixpath import zipfile @@ -37,7 +46,7 @@ def _parents(path): def _ancestry(path): """ Given a path with elements separated by - posixpath.sep, generate all elements of that path + posixpath.sep, generate all elements of that path. >>> list(_ancestry('b/d')) ['b/d', 'b'] @@ -49,9 +58,14 @@ def _ancestry(path): ['b'] >>> list(_ancestry('')) [] + + Multiple separators are treated like a single. + + >>> list(_ancestry('//b//d///f//')) + ['//b//d///f', '//b//d', '//b'] """ path = path.rstrip(posixpath.sep) - while path and path != posixpath.sep: + while path.rstrip(posixpath.sep): yield path path, tail = posixpath.split(path) @@ -86,69 +100,7 @@ class InitializedState: super().__init__(*args, **kwargs) -class SanitizedNames: - """ - ZipFile mix-in to ensure names are sanitized. - """ - - def namelist(self): - return list(map(self._sanitize, super().namelist())) - - @staticmethod - def _sanitize(name): - r""" - Ensure a relative path with posix separators and no dot names. - - Modeled after - https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813 - but provides consistent cross-platform behavior. - - >>> san = SanitizedNames._sanitize - >>> san('/foo/bar') - 'foo/bar' - >>> san('//foo.txt') - 'foo.txt' - >>> san('foo/.././bar.txt') - 'foo/bar.txt' - >>> san('foo../.bar.txt') - 'foo../.bar.txt' - >>> san('\\foo\\bar.txt') - 'foo/bar.txt' - >>> san('D:\\foo.txt') - 'D/foo.txt' - >>> san('\\\\server\\share\\file.txt') - 'server/share/file.txt' - >>> san('\\\\?\\GLOBALROOT\\Volume3') - '?/GLOBALROOT/Volume3' - >>> san('\\\\.\\PhysicalDrive1\\root') - 'PhysicalDrive1/root' - - Retain any trailing slash. - >>> san('abc/') - 'abc/' - - Raises a ValueError if the result is empty. - >>> san('../..') - Traceback (most recent call last): - ... - ValueError: Empty filename - """ - - def allowed(part): - return part and part not in {'..', '.'} - - # Remove the drive letter. - # Don't use ntpath.splitdrive, because that also strips UNC paths - bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE) - clean = bare.replace('\\', '/') - parts = clean.split('/') - joined = '/'.join(filter(allowed, parts)) - if not joined: - raise ValueError("Empty filename") - return joined + '/' * name.endswith('/') - - -class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile): +class CompleteDirs(InitializedState, zipfile.ZipFile): """ A ZipFile subclass that ensures that implied directories are always included in the namelist. @@ -329,7 +281,7 @@ class Path: >>> str(path.parent) 'mem' - If the zipfile has no filename, such attributes are not + If the zipfile has no filename, such attributes are not valid and accessing them will raise an Exception. >>> zf.filename = None @@ -470,8 +422,7 @@ class Path: prefix = re.escape(self.at) tr = Translator(seps='/') matches = re.compile(prefix + tr.translate(pattern)).fullmatch - names = (data.filename for data in self.root.filelist) - return map(self._next, filter(matches, names)) + return map(self._next, filter(matches, self.root.namelist())) def rglob(self, pattern): return self.glob(f'**/{pattern}') diff --git a/lib/zipp/compat/overlay.py b/lib/zipp/compat/overlay.py new file mode 100644 index 00000000..5a97ee7c --- /dev/null +++ b/lib/zipp/compat/overlay.py @@ -0,0 +1,37 @@ +""" +Expose zipp.Path as .zipfile.Path. + +Includes everything else in ``zipfile`` to match future usage. Just +use: + +>>> from zipp.compat.overlay import zipfile + +in place of ``import zipfile``. + +Relative imports are supported too. + +>>> from zipp.compat.overlay.zipfile import ZipInfo + +The ``zipfile`` object added to ``sys.modules`` needs to be +hashable (#126). + +>>> _ = hash(sys.modules['zipp.compat.overlay.zipfile']) +""" + +import importlib +import sys +import types + +import zipp + + +class HashableNamespace(types.SimpleNamespace): + def __hash__(self): + return hash(tuple(vars(self))) + + +zipfile = HashableNamespace(**vars(importlib.import_module('zipfile'))) +zipfile.Path = zipp.Path +zipfile._path = zipp + +sys.modules[__name__ + '.zipfile'] = zipfile # type: ignore[assignment] diff --git a/lib/zipp/compat/py310.py b/lib/zipp/compat/py310.py index d5ca53e0..8264a482 100644 --- a/lib/zipp/compat/py310.py +++ b/lib/zipp/compat/py310.py @@ -7,5 +7,7 @@ def _text_encoding(encoding, stacklevel=2, /): # pragma: no cover text_encoding = ( - io.text_encoding if sys.version_info > (3, 10) else _text_encoding # type: ignore + io.text_encoding # type: ignore[unused-ignore, attr-defined] + if sys.version_info > (3, 10) + else _text_encoding ) diff --git a/lib/zipp/glob.py b/lib/zipp/glob.py index 69c41d77..4320f1c0 100644 --- a/lib/zipp/glob.py +++ b/lib/zipp/glob.py @@ -28,7 +28,7 @@ class Translator: """ Given a glob pattern, produce a regex that matches it. """ - return self.extend(self.translate_core(pattern)) + return self.extend(self.match_dirs(self.translate_core(pattern))) def extend(self, pattern): r""" @@ -41,6 +41,14 @@ class Translator: """ return rf'(?s:{pattern})\Z' + def match_dirs(self, pattern): + """ + Ensure that zipfile.Path directory names are matched. + + zipfile.Path directory names always end in a slash. + """ + return rf'{pattern}[/]?' + def translate_core(self, pattern): r""" Given a glob pattern, produce a regex that matches it.