diff --git a/lib/packaging/__about__.py b/lib/packaging/__about__.py index 3551bc2d..1391863f 100644 --- a/lib/packaging/__about__.py +++ b/lib/packaging/__about__.py @@ -17,7 +17,7 @@ __title__ = "packaging" __summary__ = "Core utilities for Python packages" __uri__ = "https://github.com/pypa/packaging" -__version__ = "21.3" +__version__ = "22.0" __author__ = "Donald Stufft and individual contributors" __email__ = "donald@stufft.io" diff --git a/lib/packaging/_elffile.py b/lib/packaging/_elffile.py new file mode 100644 index 00000000..9fb5984b --- /dev/null +++ b/lib/packaging/_elffile.py @@ -0,0 +1,108 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +import enum +import os +import struct +from typing import IO, Optional, Tuple + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianess). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> Optional[str]: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/lib/packaging/_manylinux.py b/lib/packaging/_manylinux.py index 4c379aa6..2f0cc743 100644 --- a/lib/packaging/_manylinux.py +++ b/lib/packaging/_manylinux.py @@ -1,121 +1,58 @@ import collections +import contextlib import functools import os import re -import struct import sys import warnings -from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple +from typing import Dict, Generator, Iterator, NamedTuple, Optional, Tuple + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 -# Python does not provide platform information at sufficient granularity to -# identify the architecture of the running executable in some cases, so we -# determine it dynamically by reading the information from the running -# process. This only applies on Linux, which uses the ELF format. -class _ELFFileHeader: - # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header - class _InvalidELFFileHeader(ValueError): - """ - An invalid ELF file header was found. - """ - - ELF_MAGIC_NUMBER = 0x7F454C46 - ELFCLASS32 = 1 - ELFCLASS64 = 2 - ELFDATA2LSB = 1 - ELFDATA2MSB = 2 - EM_386 = 3 - EM_S390 = 22 - EM_ARM = 40 - EM_X86_64 = 62 - EF_ARM_ABIMASK = 0xFF000000 - EF_ARM_ABI_VER5 = 0x05000000 - EF_ARM_ABI_FLOAT_HARD = 0x00000400 - - def __init__(self, file: IO[bytes]) -> None: - def unpack(fmt: str) -> int: - try: - data = file.read(struct.calcsize(fmt)) - result: Tuple[int, ...] = struct.unpack(fmt, data) - except struct.error: - raise _ELFFileHeader._InvalidELFFileHeader() - return result[0] - - self.e_ident_magic = unpack(">I") - if self.e_ident_magic != self.ELF_MAGIC_NUMBER: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_class = unpack("B") - if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_data = unpack("B") - if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_version = unpack("B") - self.e_ident_osabi = unpack("B") - self.e_ident_abiversion = unpack("B") - self.e_ident_pad = file.read(7) - format_h = "H" - format_i = "I" - format_q = "Q" - format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q - self.e_type = unpack(format_h) - self.e_machine = unpack(format_h) - self.e_version = unpack(format_i) - self.e_entry = unpack(format_p) - self.e_phoff = unpack(format_p) - self.e_shoff = unpack(format_p) - self.e_flags = unpack(format_i) - self.e_ehsize = unpack(format_h) - self.e_phentsize = unpack(format_h) - self.e_phnum = unpack(format_h) - self.e_shentsize = unpack(format_h) - self.e_shnum = unpack(format_h) - self.e_shstrndx = unpack(format_h) - - -def _get_elf_header() -> Optional[_ELFFileHeader]: +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: try: - with open(sys.executable, "rb") as f: - elf_header = _ELFFileHeader(f) - except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): - return None - return elf_header + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None -def _is_linux_armhf() -> bool: +def _is_linux_armhf(executable: str) -> bool: # hard-float ABI can be detected from the ELF header of the running # process # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_ARM - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABIMASK - ) == elf_header.EF_ARM_ABI_VER5 - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD - ) == elf_header.EF_ARM_ABI_FLOAT_HARD - return result + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) -def _is_linux_i686() -> bool: - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_386 - return result +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) -def _have_compatible_abi(arch: str) -> bool: +def _have_compatible_abi(executable: str, arch: str) -> bool: if arch == "armv7l": - return _is_linux_armhf() + return _is_linux_armhf(executable) if arch == "i686": - return _is_linux_i686() + return _is_linux_i686(executable) return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} @@ -141,10 +78,10 @@ def _glibc_version_string_confstr() -> Optional[str]: # platform module. # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 try: - # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". - version_string = os.confstr("CS_GNU_LIBC_VERSION") + # Should be a string like "glibc 2.17". + version_string: str = getattr(os, "confstr")("CS_GNU_LIBC_VERSION") assert version_string is not None - _, version = version_string.split() + _, version = version_string.rsplit() except (AssertionError, AttributeError, OSError, ValueError): # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... return None @@ -211,8 +148,8 @@ def _parse_glibc_version(version_str: str) -> Tuple[int, int]: m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) if not m: warnings.warn( - "Expected glibc version with 2 components major.minor," - " got: %s" % version_str, + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", RuntimeWarning, ) return -1, -1 @@ -265,7 +202,7 @@ _LEGACY_MANYLINUX_MAP = { def platform_tags(linux: str, arch: str) -> Iterator[str]: - if not _have_compatible_abi(arch): + if not _have_compatible_abi(sys.executable, arch): return # Oldest glibc to be supported regardless of architecture is (2, 17). too_old_glibc2 = _GLibCVersion(2, 16) diff --git a/lib/packaging/_musllinux.py b/lib/packaging/_musllinux.py index 8ac3059b..706ba600 100644 --- a/lib/packaging/_musllinux.py +++ b/lib/packaging/_musllinux.py @@ -4,68 +4,13 @@ This module implements logic to detect if the currently running Python is linked against musl, and what musl version is used. """ -import contextlib import functools -import operator -import os import re -import struct import subprocess import sys -from typing import IO, Iterator, NamedTuple, Optional, Tuple +from typing import Iterator, NamedTuple, Optional - -def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]: - return struct.unpack(fmt, f.read(struct.calcsize(fmt))) - - -def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]: - """Detect musl libc location by parsing the Python executable. - - Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca - ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html - """ - f.seek(0) - try: - ident = _read_unpacked(f, "16B") - except struct.error: - return None - if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF. - return None - f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version. - - try: - # e_fmt: Format for program header. - # p_fmt: Format for section header. - # p_idx: Indexes to find p_type, p_offset, and p_filesz. - e_fmt, p_fmt, p_idx = { - 1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit. - 2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit. - }[ident[4]] - except KeyError: - return None - else: - p_get = operator.itemgetter(*p_idx) - - # Find the interpreter section and return its content. - try: - _, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt) - except struct.error: - return None - for i in range(e_phnum + 1): - f.seek(e_phoff + e_phentsize * i) - try: - p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt)) - except struct.error: - return None - if p_type != 3: # Not PT_INTERP. - continue - f.seek(p_offset) - interpreter = os.fsdecode(f.read(p_filesz)).strip("\0") - if "musl" not in interpreter: - return None - return interpreter - return None +from ._elffile import ELFFile class _MuslVersion(NamedTuple): @@ -95,13 +40,12 @@ def _get_musl_version(executable: str) -> Optional[_MuslVersion]: Version 1.2.2 Dynamic Program Loader """ - with contextlib.ExitStack() as stack: - try: - f = stack.enter_context(open(executable, "rb")) - except OSError: - return None - ld = _parse_ld_musl_from_elf(f) - if not ld: + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: return None proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True) return _parse_musl_version(proc.stderr) diff --git a/lib/packaging/_parser.py b/lib/packaging/_parser.py new file mode 100644 index 00000000..4dcf03d1 --- /dev/null +++ b/lib/packaging/_parser.py @@ -0,0 +1,333 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains ENBF-inspired grammar representing +the implementation. +""" + +import ast +from typing import Any, List, NamedTuple, Optional, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not suport recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: Optional[MarkerList] + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> Tuple[str, str, Optional[MarkerList]]: + """ + requirement_details = AT URL (WS requirement_marker)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + else: + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> List[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens("LEFT_BRACKET", "RIGHT_BRACKET"): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: List[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (OP VERSION (COMMA OP VERSION)*)? + """ + parsed_specifiers = "" + while tokenizer.check("OP"): + parsed_specifiers += tokenizer.read().text + + # We intentionally do not consume whitespace here, since the regular expression + # for `VERSION` uses a lookback for the operator, to determine what + # corresponding syntax is permitted. + + version_token = tokenizer.expect("VERSION", expected="version after operator") + parsed_specifiers += version_token.text + tokenizer.consume("WS") + + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if ( + env_var == "platform_python_implementation" + or env_var == "python_implementation" + ): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of " + "<=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/lib/packaging/_tokenizer.py b/lib/packaging/_tokenizer.py new file mode 100644 index 00000000..de389cb3 --- /dev/null +++ b/lib/packaging/_tokenizer.py @@ -0,0 +1,186 @@ +import contextlib +import re +from dataclasses import dataclass +from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: Tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { + "LPAREN": r"\s*\(", + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + )\b + """, + re.VERBOSE, + ), + "VERSION": re.compile(Specifier._version_regex_str, re.VERBOSE | re.IGNORECASE), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: "Dict[str, Union[str, re.Pattern[str]]]", + ) -> None: + self.source = source + self.rules: Dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Optional[Token] = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert ( + self.next_token is None + ), f"Cannot check for {name!r}, already have {self.next_token!r}" + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: Optional[int] = None, + span_end: Optional[int] = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens(self, open_token: str, close_token: str) -> Iterator[bool]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield open_position is not None + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected closing {close_token}", + span_start=open_position, + ) + + self.read() diff --git a/lib/packaging/markers.py b/lib/packaging/markers.py index cb640e8f..a5bf35c6 100644 --- a/lib/packaging/markers.py +++ b/lib/packaging/markers.py @@ -8,19 +8,10 @@ import platform import sys from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from pyparsing import ( # noqa: N817 - Forward, - Group, - Literal as L, - ParseException, - ParseResults, - QuotedString, - ZeroOrMore, - stringEnd, - stringStart, -) - +from ._parser import MarkerAtom, MarkerList, Op, Value, Variable, parse_marker +from ._tokenizer import ParserSyntaxError from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name __all__ = [ "InvalidMarker", @@ -52,101 +43,24 @@ class UndefinedEnvironmentName(ValueError): """ -class Node: - def __init__(self, value: Any) -> None: - self.value = value - - def __str__(self) -> str: - return str(self.value) - - def __repr__(self) -> str: - return f"<{self.__class__.__name__}('{self}')>" - - def serialize(self) -> str: - raise NotImplementedError - - -class Variable(Node): - def serialize(self) -> str: - return str(self) - - -class Value(Node): - def serialize(self) -> str: - return f'"{self}"' - - -class Op(Node): - def serialize(self) -> str: - return str(self) - - -VARIABLE = ( - L("implementation_version") - | L("platform_python_implementation") - | L("implementation_name") - | L("python_full_version") - | L("platform_release") - | L("platform_version") - | L("platform_machine") - | L("platform_system") - | L("python_version") - | L("sys_platform") - | L("os_name") - | L("os.name") # PEP-345 - | L("sys.platform") # PEP-345 - | L("platform.version") # PEP-345 - | L("platform.machine") # PEP-345 - | L("platform.python_implementation") # PEP-345 - | L("python_implementation") # undocumented setuptools legacy - | L("extra") # PEP-508 -) -ALIASES = { - "os.name": "os_name", - "sys.platform": "sys_platform", - "platform.version": "platform_version", - "platform.machine": "platform_machine", - "platform.python_implementation": "platform_python_implementation", - "python_implementation": "platform_python_implementation", -} -VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) - -VERSION_CMP = ( - L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") -) - -MARKER_OP = VERSION_CMP | L("not in") | L("in") -MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) - -MARKER_VALUE = QuotedString("'") | QuotedString('"') -MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) - -BOOLOP = L("and") | L("or") - -MARKER_VAR = VARIABLE | MARKER_VALUE - -MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) -MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) - -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() - -MARKER_EXPR = Forward() -MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) -MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) - -MARKER = stringStart + MARKER_EXPR + stringEnd - - -def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]: - if isinstance(results, ParseResults): - return [_coerce_parse_result(i) for i in results] - else: - return results +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results def _format_marker( - marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True ) -> str: assert isinstance(marker, (list, tuple, str)) @@ -192,7 +106,7 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: except InvalidSpecifier: pass else: - return spec.contains(lhs) + return spec.contains(lhs, prereleases=True) oper: Optional[Operator] = _operators.get(op.serialize()) if oper is None: @@ -201,25 +115,19 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: return oper(lhs, rhs) -class Undefined: - pass +def _normalize(*values: str, key: str) -> Tuple[str, ...]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + return tuple(canonicalize_name(v) for v in values) + + # other environment markers don't have such standards + return values -_undefined = Undefined() - - -def _get_env(environment: Dict[str, str], name: str) -> str: - value: Union[str, Undefined] = environment.get(name, _undefined) - - if isinstance(value, Undefined): - raise UndefinedEnvironmentName( - f"{name!r} does not exist in evaluation environment." - ) - - return value - - -def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: groups: List[List[bool]] = [[]] for marker in markers: @@ -231,12 +139,15 @@ def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: lhs, op, rhs = marker if isinstance(lhs, Variable): - lhs_value = _get_env(environment, lhs.value) + environment_key = lhs.value + lhs_value = environment[environment_key] rhs_value = rhs.value else: lhs_value = lhs.value - rhs_value = _get_env(environment, rhs.value) + environment_key = rhs.value + rhs_value = environment[environment_key] + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) groups[-1].append(_eval_op(lhs_value, op, rhs_value)) else: assert marker in ["and", "or"] @@ -274,13 +185,29 @@ def default_environment() -> Dict[str, str]: class Marker: def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. try: - self._markers = _coerce_parse_result(MARKER.parseString(marker)) - except ParseException as e: - raise InvalidMarker( - f"Invalid marker: {marker!r}, parse error at " - f"{marker[e.loc : e.loc + 8]!r}" - ) + self._markers = _normalize_extra_values(parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (, ')>, ), + # 'and', + # [ + # (, , ), + # 'or', + # (, , ) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e def __str__(self) -> str: return _format_marker(self._markers) @@ -288,6 +215,15 @@ class Marker: def __repr__(self) -> str: return f"" + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: """Evaluate a marker. @@ -298,6 +234,7 @@ class Marker: The environment is determined from the current Python process. """ current_environment = default_environment() + current_environment["extra"] = "" if environment is not None: current_environment.update(environment) diff --git a/lib/packaging/requirements.py b/lib/packaging/requirements.py index 53f9a3aa..a9f9b9c7 100644 --- a/lib/packaging/requirements.py +++ b/lib/packaging/requirements.py @@ -2,26 +2,13 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -import re -import string import urllib.parse -from typing import List, Optional as TOptional, Set +from typing import Any, List, Optional, Set -from pyparsing import ( # noqa - Combine, - Literal as L, - Optional, - ParseException, - Regex, - Word, - ZeroOrMore, - originalTextFor, - stringEnd, - stringStart, -) - -from .markers import MARKER_EXPR, Marker -from .specifiers import LegacySpecifier, Specifier, SpecifierSet +from ._parser import parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet class InvalidRequirement(ValueError): @@ -30,60 +17,6 @@ class InvalidRequirement(ValueError): """ -ALPHANUM = Word(string.ascii_letters + string.digits) - -LBRACKET = L("[").suppress() -RBRACKET = L("]").suppress() -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() -COMMA = L(",").suppress() -SEMICOLON = L(";").suppress() -AT = L("@").suppress() - -PUNCTUATION = Word("-_.") -IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) -IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) - -NAME = IDENTIFIER("name") -EXTRA = IDENTIFIER - -URI = Regex(r"[^ ]+")("url") -URL = AT + URI - -EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) -EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") - -VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) -VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) - -VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY -VERSION_MANY = Combine( - VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False -)("_raw_spec") -_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY) -_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") - -VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") -VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) - -MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") -MARKER_EXPR.setParseAction( - lambda s, l, t: Marker(s[t._original_start : t._original_end]) -) -MARKER_SEPARATOR = SEMICOLON -MARKER = MARKER_SEPARATOR + MARKER_EXPR - -VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) -URL_AND_MARKER = URL + Optional(MARKER) - -NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) - -REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd -# pyparsing isn't thread safe during initialization, so we do it eagerly, see -# issue #104 -REQUIREMENT.parseString("x[]") - - class Requirement: """Parse a requirement. @@ -99,28 +32,29 @@ class Requirement: def __init__(self, requirement_string: str) -> None: try: - req = REQUIREMENT.parseString(requirement_string) - except ParseException as e: - raise InvalidRequirement( - f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}' - ) + parsed = parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e - self.name: str = req.name - if req.url: - parsed_url = urllib.parse.urlparse(req.url) + self.name: str = parsed.name + if parsed.url: + parsed_url = urllib.parse.urlparse(parsed.url) if parsed_url.scheme == "file": - if urllib.parse.urlunparse(parsed_url) != req.url: + if urllib.parse.urlunparse(parsed_url) != parsed.url: raise InvalidRequirement("Invalid URL given") elif not (parsed_url.scheme and parsed_url.netloc) or ( not parsed_url.scheme and not parsed_url.netloc ): - raise InvalidRequirement(f"Invalid URL: {req.url}") - self.url: TOptional[str] = req.url + raise InvalidRequirement(f"Invalid URL: {parsed.url}") + self.url: Optional[str] = parsed.url else: self.url = None - self.extras: Set[str] = set(req.extras.asList() if req.extras else []) - self.specifier: SpecifierSet = SpecifierSet(req.specifier) - self.marker: TOptional[Marker] = req.marker if req.marker else None + self.extras: Set[str] = set(parsed.extras if parsed.extras else []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Optional[Marker] = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) def __str__(self) -> str: parts: List[str] = [self.name] @@ -144,3 +78,18 @@ class Requirement: def __repr__(self) -> str: return f"" + + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + self.name == other.name + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/lib/packaging/specifiers.py b/lib/packaging/specifiers.py index 0e218a6f..645214ae 100644 --- a/lib/packaging/specifiers.py +++ b/lib/packaging/specifiers.py @@ -1,38 +1,40 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from packaging.version import Version +""" import abc -import functools import itertools import re -import warnings -from typing import ( - Callable, - Dict, - Iterable, - Iterator, - List, - Optional, - Pattern, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Callable, Iterable, Iterator, List, Optional, Set, Tuple, Union from .utils import canonicalize_version -from .version import LegacyVersion, Version, parse +from .version import Version -ParsedVersion = Union[Version, LegacyVersion] -UnparsedVersion = Union[Version, LegacyVersion, str] -VersionTypeVar = TypeVar("VersionTypeVar", bound=UnparsedVersion) -CallableOperator = Callable[[ParsedVersion, str], bool] +UnparsedVersion = Union[Version, str] +CallableOperator = Callable[[Version, str], bool] + + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version class InvalidSpecifier(ValueError): """ - An invalid specifier was found, users should refer to PEP 440. + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' """ @@ -40,35 +42,39 @@ class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod def __str__(self) -> str: """ - Returns the str representation of this Specifier like object. This + Returns the str representation of this Specifier-like object. This should be representative of the Specifier itself. """ @abc.abstractmethod def __hash__(self) -> int: """ - Returns a hash value for this Specifier like object. + Returns a hash value for this Specifier-like object. """ @abc.abstractmethod def __eq__(self, other: object) -> bool: """ - Returns a boolean representing whether or not the two Specifier like + Returns a boolean representing whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. """ - @abc.abstractproperty + @property + @abc.abstractmethod def prereleases(self) -> Optional[bool]: - """ - Returns whether or not pre-releases as a whole are allowed by this - specifier. + """Whether or not pre-releases as a whole are allowed. + + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. """ @prereleases.setter def prereleases(self, value: bool) -> None: - """ - Sets whether or not pre-releases as a whole are allowed by this - specifier. + """Setter for :attr:`prereleases`. + + :param value: The value to set. """ @abc.abstractmethod @@ -79,227 +85,28 @@ class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: + self, iterable: Iterable[UnparsedVersion], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersion]: """ Takes an iterable of items and filters them so that only items which are contained within this specifier are allowed in it. """ -class _IndividualSpecifier(BaseSpecifier): +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. - _operators: Dict[str, str] = {} - _regex: Pattern[str] + .. tip:: - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - match = self._regex.search(spec) - if not match: - raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ - self._spec: Tuple[str, str] = ( - match.group("operator").strip(), - match.group("version").strip(), - ) - - # Store whether or not this Specifier should accept prereleases - self._prereleases = prereleases - - def __repr__(self) -> str: - pre = ( - f", prereleases={self.prereleases!r}" - if self._prereleases is not None - else "" - ) - - return f"<{self.__class__.__name__}({str(self)!r}{pre})>" - - def __str__(self) -> str: - return "{}{}".format(*self._spec) - - @property - def _canonical_spec(self) -> Tuple[str, str]: - return self._spec[0], canonicalize_version(self._spec[1]) - - def __hash__(self) -> int: - return hash(self._canonical_spec) - - def __eq__(self, other: object) -> bool: - if isinstance(other, str): - try: - other = self.__class__(str(other)) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._canonical_spec == other._canonical_spec - - def _get_operator(self, op: str) -> CallableOperator: - operator_callable: CallableOperator = getattr( - self, f"_compare_{self._operators[op]}" - ) - return operator_callable - - def _coerce_version(self, version: UnparsedVersion) -> ParsedVersion: - if not isinstance(version, (LegacyVersion, Version)): - version = parse(version) - return version - - @property - def operator(self) -> str: - return self._spec[0] - - @property - def version(self) -> str: - return self._spec[1] - - @property - def prereleases(self) -> Optional[bool]: - return self._prereleases - - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value - - def __contains__(self, item: str) -> bool: - return self.contains(item) - - def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None - ) -> bool: - - # Determine if prereleases are to be allowed or not. - if prereleases is None: - prereleases = self.prereleases - - # Normalize item to a Version or LegacyVersion, this allows us to have - # a shortcut for ``"2.0" in Specifier(">=2") - normalized_item = self._coerce_version(item) - - # Determine if we should be supporting prereleases in this specifier - # or not, if we do not support prereleases than we can short circuit - # logic if this version is a prereleases. - if normalized_item.is_prerelease and not prereleases: - return False - - # Actually do the comparison to determine if this item is contained - # within this Specifier or not. - operator_callable: CallableOperator = self._get_operator(self.operator) - return operator_callable(normalized_item, self.version) - - def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: - - yielded = False - found_prereleases = [] - - kw = {"prereleases": prereleases if prereleases is not None else True} - - # Attempt to iterate over all the values in the iterable and if any of - # them match, yield them. - for version in iterable: - parsed_version = self._coerce_version(version) - - if self.contains(parsed_version, **kw): - # If our version is a prerelease, and we were not set to allow - # prereleases, then we'll store it for later in case nothing - # else matches this specifier. - if parsed_version.is_prerelease and not ( - prereleases or self.prereleases - ): - found_prereleases.append(version) - # Either this is not a prerelease, or we should have been - # accepting prereleases from the beginning. - else: - yielded = True - yield version - - # Now that we've iterated over everything, determine if we've yielded - # any values, and if we have not and we have any prereleases stored up - # then we will go ahead and yield the prereleases. - if not yielded and found_prereleases: - for version in found_prereleases: - yield version - - -class LegacySpecifier(_IndividualSpecifier): - - _regex_str = r""" - (?P(==|!=|<=|>=|<|>)) - \s* - (?P - [^,;\s)]* # Since this is a "legacy" specifier, and the version - # string can be just about anything, we match everything - # except for whitespace, a semi-colon for marker support, - # a closing paren since versions can be enclosed in - # them, and a comma since it's a version separator. - ) - """ - - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) - - _operators = { - "==": "equal", - "!=": "not_equal", - "<=": "less_than_equal", - ">=": "greater_than_equal", - "<": "less_than", - ">": "greater_than", - } - - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - super().__init__(spec, prereleases) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def _coerce_version(self, version: UnparsedVersion) -> LegacyVersion: - if not isinstance(version, LegacyVersion): - version = LegacyVersion(str(version)) - return version - - def _compare_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective == self._coerce_version(spec) - - def _compare_not_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective != self._coerce_version(spec) - - def _compare_less_than_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective <= self._coerce_version(spec) - - def _compare_greater_than_equal( - self, prospective: LegacyVersion, spec: str - ) -> bool: - return prospective >= self._coerce_version(spec) - - def _compare_less_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective < self._coerce_version(spec) - - def _compare_greater_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective > self._coerce_version(spec) - - -def _require_version_compare( - fn: Callable[["Specifier", ParsedVersion, str], bool] -) -> Callable[["Specifier", ParsedVersion, str], bool]: - @functools.wraps(fn) - def wrapped(self: "Specifier", prospective: ParsedVersion, spec: str) -> bool: - if not isinstance(prospective, Version): - return False - return fn(self, prospective, spec) - - return wrapped - - -class Specifier(_IndividualSpecifier): - - _regex_str = r""" + _operator_regex_str = r""" (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" (?P (?: # The identity operators allow for an escape hatch that will @@ -309,8 +116,10 @@ class Specifier(_IndividualSpecifier): # but included entirely as an escape hatch. (?<====) # Only match for the identity operator \s* - [^\s]* # We just match everything, except for whitespace - # since we are only testing for strict identity. + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. ) | (?: @@ -323,23 +132,23 @@ class Specifier(_IndividualSpecifier): v? (?:[0-9]+!)? # epoch [0-9]+(?:\.[0-9]+)* # release - (?: # pre release - [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) - [-_\.]? - [0-9]* - )? - (?: # post release - (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) - )? - # You cannot use a wild card and a dev or local version - # together so group them with a | and make them optional. + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (a|b|c|rc|alpha|beta|pre|preview) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local - | - \.\* # Wild card syntax of .* )? ) | @@ -391,7 +200,10 @@ class Specifier(_IndividualSpecifier): ) """ - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + r"^\s*" + _operator_regex_str + _version_regex_str + r"\s*$", + re.VERBOSE | re.IGNORECASE, + ) _operators = { "~=": "compatible", @@ -404,8 +216,152 @@ class Specifier(_IndividualSpecifier): "===": "arbitrary", } - @_require_version_compare - def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + @property + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + =1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: # Compatible releases have an equivalent combination of >= and ==. That # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to @@ -426,34 +382,33 @@ class Specifier(_IndividualSpecifier): prospective, prefix ) - @_require_version_compare - def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_equal(self, prospective: Version, spec: str) -> bool: # We need special logic to handle prefix matching if spec.endswith(".*"): # In the case of prefix matching we want to ignore local segment. - prospective = Version(prospective.public) + normalized_prospective = canonicalize_version(prospective.public) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) # Split the spec out by dots, and pretend that there is an implicit # dot in between a release segment and a pre-release segment. - split_spec = _version_split(spec[:-2]) # Remove the trailing .* + split_spec = _version_split(normalized_spec) # Split the prospective version out by dots, and pretend that there # is an implicit dot in between a release segment and a pre-release # segment. - split_prospective = _version_split(str(prospective)) + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) # Shorten the prospective version to be the same length as the spec # so that we can determine if the specifier is a prefix of the # prospective version or not. - shortened_prospective = split_prospective[: len(split_spec)] + shortened_prospective = padded_prospective[: len(split_spec)] - # Pad out our two sides with zeros so that they both equal the same - # length. - padded_spec, padded_prospective = _pad_version( - split_spec, shortened_prospective - ) - - return padded_prospective == padded_spec + return shortened_prospective == split_spec else: # Convert our spec string into a Version spec_version = Version(spec) @@ -466,30 +421,24 @@ class Specifier(_IndividualSpecifier): return prospective == spec_version - @_require_version_compare - def _compare_not_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: return not self._compare_equal(prospective, spec) - @_require_version_compare - def _compare_less_than_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) <= Version(spec) - @_require_version_compare - def _compare_greater_than_equal( - self, prospective: ParsedVersion, spec: str - ) -> bool: + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) >= Version(spec) - @_require_version_compare - def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -514,8 +463,7 @@ class Specifier(_IndividualSpecifier): # version in the spec. return True - @_require_version_compare - def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -549,34 +497,133 @@ class Specifier(_IndividualSpecifier): def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: return str(prospective).lower() == str(spec).lower() - @property - def prereleases(self) -> bool: + def __contains__(self, item: Union[str, Version]) -> bool: + """Return whether or not the item is contained in this specifier. - # If there is an explicit prereleases set for this, then we'll just - # blindly use that. - if self._prereleases is not None: - return self._prereleases + :param item: The item to check for. - # Look at all of our specifiers and determine if they are inclusive - # operators, and if they are if they are including an explicit - # prerelease. - operator, version = self._spec - if operator in ["==", ">=", "<=", "~=", "==="]: - # The == specifier can include a trailing .*, if it does we - # want to remove before parsing. - if operator == "==" and version.endswith(".*"): - version = version[:-2] + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. - # Parse the version, and if it is a pre-release than this - # specifier allows pre-releases. - if parse(version).is_prerelease: - return True + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) - return False + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + """Return whether or not the item is contained in this specifier. - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ + + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersion], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersion]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', ] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later incase nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version _prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") @@ -618,22 +665,39 @@ def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str class SpecifierSet(BaseSpecifier): + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ + def __init__( self, specifiers: str = "", prereleases: Optional[bool] = None ) -> None: + """Initialize a SpecifierSet instance. - # Split on , to break each individual specifier into it's own item, and + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ + + # Split on `,` to break each individual specifier into it's own item, and # strip each item to remove leading/trailing whitespace. split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] # Parsed each individual specifier, attempting first to make it a - # Specifier and falling back to a LegacySpecifier. - parsed: Set[_IndividualSpecifier] = set() + # Specifier. + parsed: Set[Specifier] = set() for specifier in split_specifiers: - try: - parsed.add(Specifier(specifier)) - except InvalidSpecifier: - parsed.add(LegacySpecifier(specifier)) + parsed.add(Specifier(specifier)) # Turn our parsed specifiers into a frozen set and save them for later. self._specs = frozenset(parsed) @@ -642,7 +706,40 @@ class SpecifierSet(BaseSpecifier): # we accept prereleases or not. self._prereleases = prereleases + @property + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + =1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ pre = ( f", prereleases={self.prereleases!r}" if self._prereleases is not None @@ -652,12 +749,31 @@ class SpecifierSet(BaseSpecifier): return f"" def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ return ",".join(sorted(str(s) for s in self._specs)) def __hash__(self) -> int: return hash(self._specs) def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + =1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + =1.0.0')> + """ if isinstance(other, str): other = SpecifierSet(other) elif not isinstance(other, SpecifierSet): @@ -681,7 +797,25 @@ class SpecifierSet(BaseSpecifier): return specifier def __eq__(self, other: object) -> bool: - if isinstance(other, (str, _IndividualSpecifier)): + """Whether or not the two SpecifierSet-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): other = SpecifierSet(str(other)) elif not isinstance(other, SpecifierSet): return NotImplemented @@ -689,43 +823,72 @@ class SpecifierSet(BaseSpecifier): return self._specs == other._specs def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" return len(self._specs) - def __iter__(self) -> Iterator[_IndividualSpecifier]: + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. + + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [, =1.0.0')>] + """ return iter(self._specs) - @property - def prereleases(self) -> Optional[bool]: - - # If we have been given an explicit prerelease modifier, then we'll - # pass that through here. - if self._prereleases is not None: - return self._prereleases - - # If we don't have any specifiers, and we don't have a forced value, - # then we'll just return None since we don't know if this should have - # pre-releases or not. - if not self._specs: - return None - - # Otherwise we'll see if any of the given specifiers accept - # prereleases, if any of them do we'll return True, otherwise False. - return any(s.prereleases for s in self._specs) - - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value - def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ return self.contains(item) def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None + self, + item: UnparsedVersion, + prereleases: Optional[bool] = None, + installed: Optional[bool] = None, ) -> bool: + """Return whether or not the item is contained in this SpecifierSet. - # Ensure that our item is a Version or LegacyVersion instance. - if not isinstance(item, (LegacyVersion, Version)): - item = parse(item) + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the @@ -742,6 +905,9 @@ class SpecifierSet(BaseSpecifier): if not prereleases and item.is_prerelease: return False + if installed and item.is_prerelease: + item = Version(item.base_version) + # We simply dispatch to the underlying specs here to make sure that the # given version is contained within all of them. # Note: This use of all() here means that an empty set of specifiers @@ -749,9 +915,46 @@ class SpecifierSet(BaseSpecifier): return all(s.contains(item, prereleases=prereleases) for s in self._specs) def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: + self, iterable: Iterable[UnparsedVersion], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersion]: + """Filter items in the given iterable, that match the specifiers in this set. + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', ] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the # SpecifierSet thinks for whether or not we should support prereleases. @@ -764,27 +967,16 @@ class SpecifierSet(BaseSpecifier): if self._specs: for spec in self._specs: iterable = spec.filter(iterable, prereleases=bool(prereleases)) - return iterable + return iter(iterable) # If we do not have any specifiers, then we need to have a rough filter # which will filter out any pre-releases, unless there are no final - # releases, and which will filter out LegacyVersion in general. + # releases. else: - filtered: List[VersionTypeVar] = [] - found_prereleases: List[VersionTypeVar] = [] - - item: UnparsedVersion - parsed_version: Union[Version, LegacyVersion] + filtered: List[UnparsedVersion] = [] + found_prereleases: List[UnparsedVersion] = [] for item in iterable: - # Ensure that we some kind of Version class for this item. - if not isinstance(item, (LegacyVersion, Version)): - parsed_version = parse(item) - else: - parsed_version = item - - # Filter out any item which is parsed as a LegacyVersion - if isinstance(parsed_version, LegacyVersion): - continue + parsed_version = _coerce_version(item) # Store any item which is a pre-release for later unless we've # already found a final version or we are accepting prereleases @@ -797,6 +989,6 @@ class SpecifierSet(BaseSpecifier): # If we've found no items except for pre-releases, then we'll go # ahead and use the pre-releases if not filtered and found_prereleases and prereleases is None: - return found_prereleases + return iter(found_prereleases) - return filtered + return iter(filtered) diff --git a/lib/packaging/tags.py b/lib/packaging/tags.py index 9a3d25a7..a0e1ea23 100644 --- a/lib/packaging/tags.py +++ b/lib/packaging/tags.py @@ -4,6 +4,7 @@ import logging import platform +import subprocess import sys import sysconfig from importlib.machinery import EXTENSION_SUFFIXES @@ -36,7 +37,7 @@ INTERPRETER_SHORT_NAMES: Dict[str, str] = { } -_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 +_32_BIT_INTERPRETER = sys.maxsize <= 2**32 class Tag: @@ -356,6 +357,22 @@ def mac_platforms( version_str, _, cpu_arch = platform.mac_ver() if version is None: version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + universal_newlines=True, + ).stdout + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) else: version = version if arch is None: @@ -482,6 +499,9 @@ def sys_tags(*, warn: bool = False) -> Iterator[Tag]: yield from generic_tags() if interp_name == "pp": - yield from compatible_tags(interpreter="pp3") + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) else: - yield from compatible_tags() + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/lib/packaging/utils.py b/lib/packaging/utils.py index bab11b80..33c613b7 100644 --- a/lib/packaging/utils.py +++ b/lib/packaging/utils.py @@ -35,7 +35,9 @@ def canonicalize_name(name: str) -> NormalizedName: return cast(NormalizedName, value) -def canonicalize_version(version: Union[Version, str]) -> str: +def canonicalize_version( + version: Union[Version, str], *, strip_trailing_zero: bool = True +) -> str: """ This is very similar to Version.__str__, but has one subtle difference with the way it handles the release segment. @@ -56,8 +58,11 @@ def canonicalize_version(version: Union[Version, str]) -> str: parts.append(f"{parsed.epoch}!") # Release segment - # NB: This strips trailing '.0's to normalize - parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release))) + release_segment = ".".join(str(x) for x in parsed.release) + if strip_trailing_zero: + # NB: This strips trailing '.0's to normalize + release_segment = re.sub(r"(\.0)+$", "", release_segment) + parts.append(release_segment) # Pre-release if parsed.pre is not None: diff --git a/lib/packaging/version.py b/lib/packaging/version.py index de9a09a4..e5c738cf 100644 --- a/lib/packaging/version.py +++ b/lib/packaging/version.py @@ -1,16 +1,20 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from packaging.version import parse, Version +""" import collections import itertools import re -import warnings -from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union +from typing import Callable, Optional, SupportsInt, Tuple, Union from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType -__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] +__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"] InfiniteTypes = Union[InfinityType, NegativeInfinityType] PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] @@ -29,36 +33,37 @@ LocalType = Union[ CmpKey = Tuple[ int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType ] -LegacyCmpKey = Tuple[int, Tuple[str, ...]] -VersionComparisonMethod = Callable[ - [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool -] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] _Version = collections.namedtuple( "_Version", ["epoch", "release", "dev", "pre", "post", "local"] ) -def parse(version: str) -> Union["LegacyVersion", "Version"]: +def parse(version: str) -> "Version": + """Parse the given version string. + + >>> parse('1.0.dev1') + + + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. """ - Parse the given version string and return either a :class:`Version` object - or a :class:`LegacyVersion` object depending on if the given version is - a valid PEP 440 version or a legacy version. - """ - try: - return Version(version) - except InvalidVersion: - return LegacyVersion(version) + return Version(version) class InvalidVersion(ValueError): - """ - An invalid version was found, users should refer to PEP 440. + """Raised when a version string is not a valid version. + + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' """ class _BaseVersion: - _key: Union[CmpKey, LegacyCmpKey] + _key: CmpKey def __hash__(self) -> int: return hash(self._key) @@ -103,126 +108,9 @@ class _BaseVersion: return self._key != other._key -class LegacyVersion(_BaseVersion): - def __init__(self, version: str) -> None: - self._version = str(version) - self._key = _legacy_cmpkey(self._version) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def __str__(self) -> str: - return self._version - - def __repr__(self) -> str: - return f"" - - @property - def public(self) -> str: - return self._version - - @property - def base_version(self) -> str: - return self._version - - @property - def epoch(self) -> int: - return -1 - - @property - def release(self) -> None: - return None - - @property - def pre(self) -> None: - return None - - @property - def post(self) -> None: - return None - - @property - def dev(self) -> None: - return None - - @property - def local(self) -> None: - return None - - @property - def is_prerelease(self) -> bool: - return False - - @property - def is_postrelease(self) -> bool: - return False - - @property - def is_devrelease(self) -> bool: - return False - - -_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) - -_legacy_version_replacement_map = { - "pre": "c", - "preview": "c", - "-": "final-", - "rc": "c", - "dev": "@", -} - - -def _parse_version_parts(s: str) -> Iterator[str]: - for part in _legacy_version_component_re.split(s): - part = _legacy_version_replacement_map.get(part, part) - - if not part or part == ".": - continue - - if part[:1] in "0123456789": - # pad for numeric comparison - yield part.zfill(8) - else: - yield "*" + part - - # ensure that alpha/beta/candidate are before final - yield "*final" - - -def _legacy_cmpkey(version: str) -> LegacyCmpKey: - - # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch - # greater than or equal to 0. This will effectively put the LegacyVersion, - # which uses the defacto standard originally implemented by setuptools, - # as before all PEP 440 versions. - epoch = -1 - - # This scheme is taken from pkg_resources.parse_version setuptools prior to - # it's adoption of the packaging library. - parts: List[str] = [] - for part in _parse_version_parts(version.lower()): - if part.startswith("*"): - # remove "-" before a prerelease tag - if part < "*final": - while parts and parts[-1] == "*final-": - parts.pop() - - # remove trailing zeros from each series of numeric parts - while parts and parts[-1] == "00000000": - parts.pop() - - parts.append(part) - - return epoch, tuple(parts) - - # Deliberately not anchored to the start and end of the string, to make it # easier for 3rd party code to reuse -VERSION_PATTERN = r""" +_VERSION_PATTERN = r""" v? (?: (?:(?P[0-9]+)!)? # epoch @@ -253,12 +141,55 @@ VERSION_PATTERN = r""" (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version """ +VERSION_PATTERN = _VERSION_PATTERN +""" +A string containing the regular expression used to match a valid version. + +The pattern is not anchored at either end, and is intended for embedding in larger +expressions (for example, matching a version number as part of a file name). The +regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE`` +flags set. + +:meta hide-value: +""" + class Version(_BaseVersion): + """This class abstracts handling of a project's versions. + + A :class:`Version` instance is comparison aware and can be compared and + sorted using the standard Python interfaces. + + >>> v1 = Version("1.0a5") + >>> v2 = Version("1.0") + >>> v1 + + >>> v2 + + >>> v1 < v2 + True + >>> v1 == v2 + False + >>> v1 > v2 + False + >>> v1 >= v2 + False + >>> v1 <= v2 + True + """ _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) def __init__(self, version: str) -> None: + """Initialize a Version object. + + :param version: + The string representation of a version which will be parsed and normalized + before use. + :raises InvalidVersion: + If the ``version`` does not conform to PEP 440 in any way then this + exception will be raised. + """ # Validate the version and parse it into pieces match = self._regex.search(version) @@ -288,9 +219,19 @@ class Version(_BaseVersion): ) def __repr__(self) -> str: + """A representation of the Version that shows all internal state. + + >>> Version('1.0.0') + + """ return f"" def __str__(self) -> str: + """A string representation of the version that can be rounded-tripped. + + >>> str(Version("1.0a5")) + '1.0a5' + """ parts = [] # Epoch @@ -320,29 +261,80 @@ class Version(_BaseVersion): @property def epoch(self) -> int: + """The epoch of the version. + + >>> Version("2.0.0").epoch + 0 + >>> Version("1!2.0.0").epoch + 1 + """ _epoch: int = self._version.epoch return _epoch @property def release(self) -> Tuple[int, ...]: + """The components of the "release" segment of the version. + + >>> Version("1.2.3").release + (1, 2, 3) + >>> Version("2.0.0").release + (2, 0, 0) + >>> Version("1!2.0.0.post0").release + (2, 0, 0) + + Includes trailing zeroes but not the epoch or any pre-release / development / + post-release suffixes. + """ _release: Tuple[int, ...] = self._version.release return _release @property def pre(self) -> Optional[Tuple[str, int]]: + """The pre-release segment of the version. + + >>> print(Version("1.2.3").pre) + None + >>> Version("1.2.3a1").pre + ('a', 1) + >>> Version("1.2.3b1").pre + ('b', 1) + >>> Version("1.2.3rc1").pre + ('rc', 1) + """ _pre: Optional[Tuple[str, int]] = self._version.pre return _pre @property def post(self) -> Optional[int]: + """The post-release number of the version. + + >>> print(Version("1.2.3").post) + None + >>> Version("1.2.3.post1").post + 1 + """ return self._version.post[1] if self._version.post else None @property def dev(self) -> Optional[int]: + """The development number of the version. + + >>> print(Version("1.2.3").dev) + None + >>> Version("1.2.3.dev1").dev + 1 + """ return self._version.dev[1] if self._version.dev else None @property def local(self) -> Optional[str]: + """The local version segment of the version. + + >>> print(Version("1.2.3").local) + None + >>> Version("1.2.3+abc").local + 'abc' + """ if self._version.local: return ".".join(str(x) for x in self._version.local) else: @@ -350,10 +342,31 @@ class Version(_BaseVersion): @property def public(self) -> str: + """The public portion of the version. + + >>> Version("1.2.3").public + '1.2.3' + >>> Version("1.2.3+abc").public + '1.2.3' + >>> Version("1.2.3+abc.dev1").public + '1.2.3' + """ return str(self).split("+", 1)[0] @property def base_version(self) -> str: + """The "base version" of the version. + + >>> Version("1.2.3").base_version + '1.2.3' + >>> Version("1.2.3+abc").base_version + '1.2.3' + >>> Version("1!1.2.3+abc.dev1").base_version + '1!1.2.3' + + The "base version" is the public version of the project without any pre or post + release markers. + """ parts = [] # Epoch @@ -367,26 +380,72 @@ class Version(_BaseVersion): @property def is_prerelease(self) -> bool: + """Whether this version is a pre-release. + + >>> Version("1.2.3").is_prerelease + False + >>> Version("1.2.3a1").is_prerelease + True + >>> Version("1.2.3b1").is_prerelease + True + >>> Version("1.2.3rc1").is_prerelease + True + >>> Version("1.2.3dev1").is_prerelease + True + """ return self.dev is not None or self.pre is not None @property def is_postrelease(self) -> bool: + """Whether this version is a post-release. + + >>> Version("1.2.3").is_postrelease + False + >>> Version("1.2.3.post1").is_postrelease + True + """ return self.post is not None @property def is_devrelease(self) -> bool: + """Whether this version is a development release. + + >>> Version("1.2.3").is_devrelease + False + >>> Version("1.2.3.dev1").is_devrelease + True + """ return self.dev is not None @property def major(self) -> int: + """The first item of :attr:`release` or ``0`` if unavailable. + + >>> Version("1.2.3").major + 1 + """ return self.release[0] if len(self.release) >= 1 else 0 @property def minor(self) -> int: + """The second item of :attr:`release` or ``0`` if unavailable. + + >>> Version("1.2.3").minor + 2 + >>> Version("1").minor + 0 + """ return self.release[1] if len(self.release) >= 2 else 0 @property def micro(self) -> int: + """The third item of :attr:`release` or ``0`` if unavailable. + + >>> Version("1.2.3").micro + 3 + >>> Version("1").micro + 0 + """ return self.release[2] if len(self.release) >= 3 else 0 diff --git a/requirements.txt b/requirements.txt index e31185a6..cffb49c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,7 +25,7 @@ IPy==1.01 Mako==1.2.4 MarkupSafe==2.1.1 musicbrainzngs==0.7.1 -packaging==21.3 +packaging==22.0 paho-mqtt==1.6.1 plexapi==4.13.1 portend==3.1.0