Bump pyparsing from 3.1.4 to 3.2.0 (#2437)

* Bump pyparsing from 3.1.4 to 3.2.0

Bumps [pyparsing](https://github.com/pyparsing/pyparsing) from 3.1.4 to 3.2.0.
- [Release notes](https://github.com/pyparsing/pyparsing/releases)
- [Changelog](https://github.com/pyparsing/pyparsing/blob/master/CHANGES)
- [Commits](https://github.com/pyparsing/pyparsing/compare/3.1.4...3.2.0)

---
updated-dependencies:
- dependency-name: pyparsing
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update pyparsing==3.2.0

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2024-11-19 10:00:11 -08:00 committed by GitHub
parent 2fe3f039cc
commit be2e63e7e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 624 additions and 373 deletions

View file

@ -120,8 +120,8 @@ class version_info(NamedTuple):
return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"
__version_info__ = version_info(3, 1, 4, "final", 1)
__version_time__ = "25 Aug 2024 14:40 UTC"
__version_info__ = version_info(3, 2, 0, "final", 1)
__version_time__ = "13 Oct 2024 09:46 UTC"
__version__ = __version_info__.__version__
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
@ -131,9 +131,9 @@ from .exceptions import *
from .actions import *
from .core import __diag__, __compat__
from .results import *
from .core import * # type: ignore[misc, assignment]
from .core import *
from .core import _builtin_exprs as core_builtin_exprs
from .helpers import * # type: ignore[misc, assignment]
from .helpers import *
from .helpers import _builtin_exprs as helper_builtin_exprs
from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
@ -147,9 +147,9 @@ from .common import (
if "pyparsing_unicode" not in globals():
pyparsing_unicode = unicode # type: ignore[misc]
if "pyparsing_common" not in globals():
pyparsing_common = common # type: ignore[misc]
pyparsing_common = common
if "pyparsing_test" not in globals():
pyparsing_test = testing # type: ignore[misc]
pyparsing_test = testing
core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
@ -208,6 +208,7 @@ __all__ = [
"StringEnd",
"StringStart",
"Suppress",
"Tag",
"Token",
"TokenConverter",
"White",

File diff suppressed because it is too large Load diff

View file

@ -1,20 +1,20 @@
# mypy: ignore-errors
from __future__ import annotations
import railroad
import pyparsing
import dataclasses
import typing
from typing import (
List,
NamedTuple,
Generic,
TypeVar,
Dict,
Callable,
Set,
Iterable,
)
from jinja2 import Template
from io import StringIO
import inspect
import re
jinja2_template_source = """\
@ -55,14 +55,23 @@ jinja2_template_source = """\
template = Template(jinja2_template_source)
# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
NamedDiagram = NamedTuple(
"NamedDiagram",
[("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)],
)
"""
A simple structure for associating a name with a railroad diagram
"""
def _collapse_verbose_regex(regex_str: str) -> str:
collapsed = pyparsing.Regex(r"#.*").suppress().transform_string(regex_str)
collapsed = re.sub(r"\s*\n\s*", "", collapsed)
return collapsed
@dataclasses.dataclass
class NamedDiagram:
"""
A simple structure for associating a name with a railroad diagram
"""
name: str
index: int
diagram: railroad.DiagramItem = None
T = TypeVar("T")
@ -108,7 +117,7 @@ class EditablePartial(Generic[T]):
self.kwargs = kwargs
@classmethod
def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
def from_call(cls, func: Callable[..., T], *args, **kwargs) -> EditablePartial[T]:
"""
If you call this function in the same way that you would call the constructor, it will store the arguments
as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
@ -135,7 +144,7 @@ class EditablePartial(Generic[T]):
return self.func(*args, **kwargs)
def railroad_to_html(diagrams: List[NamedDiagram], embed=False, **kwargs) -> str:
def railroad_to_html(diagrams: list[NamedDiagram], embed=False, **kwargs) -> str:
"""
Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
:params kwargs: kwargs to be passed in to the template
@ -158,7 +167,7 @@ def railroad_to_html(diagrams: List[NamedDiagram], embed=False, **kwargs) -> str
return template.render(diagrams=data, embed=embed, **kwargs)
def resolve_partial(partial: "EditablePartial[T]") -> T:
def resolve_partial(partial: EditablePartial[T]) -> T:
"""
Recursively resolves a collection of Partials into whatever type they are
"""
@ -180,7 +189,7 @@ def to_railroad(
vertical: int = 3,
show_results_names: bool = False,
show_groups: bool = False,
) -> List[NamedDiagram]:
) -> list[NamedDiagram]:
"""
Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
creation if you want to access the Railroad tree before it is converted to HTML
@ -244,40 +253,31 @@ def _should_vertical(
return len(_visible_exprs(exprs)) >= specification
@dataclasses.dataclass
class ElementState:
"""
State recorded for an individual pyparsing Element
"""
# Note: this should be a dataclass, but we have to support Python 3.5
def __init__(
self,
element: pyparsing.ParserElement,
converted: EditablePartial,
parent: EditablePartial,
number: int,
name: str = None,
parent_index: typing.Optional[int] = None,
):
#: The pyparsing element that this represents
self.element: pyparsing.ParserElement = element
#: The name of the element
self.name: typing.Optional[str] = name
element: pyparsing.ParserElement
#: The output Railroad element in an unconverted state
self.converted: EditablePartial = converted
converted: EditablePartial
#: The parent Railroad element, which we store so that we can extract this if it's duplicated
self.parent: EditablePartial = parent
parent: EditablePartial
#: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
self.number: int = number
number: int
#: The name of the element
name: str = None
#: The index of this inside its parent
self.parent_index: typing.Optional[int] = parent_index
parent_index: typing.Optional[int] = None
#: If true, we should extract this out into a subdiagram
self.extract: bool = False
extract: bool = False
#: If true, all of this element's children have been filled out
self.complete: bool = False
complete: bool = False
def mark_for_extraction(
self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
self, el_id: int, state: ConverterState, name: str = None, force: bool = False
):
"""
Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
@ -313,16 +313,16 @@ class ConverterState:
def __init__(self, diagram_kwargs: typing.Optional[dict] = None):
#: A dictionary mapping ParserElements to state relating to them
self._element_diagram_states: Dict[int, ElementState] = {}
self._element_diagram_states: dict[int, ElementState] = {}
#: A dictionary mapping ParserElement IDs to subdiagrams generated from them
self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
self.diagrams: dict[int, EditablePartial[NamedDiagram]] = {}
#: The index of the next unnamed element
self.unnamed_index: int = 1
#: The index of the next element. This is used for sorting
self.index: int = 0
#: Shared kwargs that are used to customize the construction of diagrams
self.diagram_kwargs: dict = diagram_kwargs or {}
self.extracted_diagram_names: Set[str] = set()
self.extracted_diagram_names: set[str] = set()
def __setitem__(self, key: int, value: ElementState):
self._element_diagram_states[key] = value
@ -513,7 +513,7 @@ def _to_diagram_element(
# If the element isn't worth extracting, we always treat it as the first time we say it
if _worth_extracting(element):
if el_id in lookup:
if el_id in lookup and lookup[el_id].name is not None:
# If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
# so we have to extract it into a new diagram.
looked_up = lookup[el_id]
@ -618,6 +618,11 @@ def _to_diagram_element(
ret = EditablePartial.from_call(railroad.Sequence, items=[])
elif len(exprs) > 0 and not element_results_name:
ret = EditablePartial.from_call(railroad.Group, item="", label=name)
elif isinstance(element, pyparsing.Regex):
patt = _collapse_verbose_regex(element.pattern)
element.pattern = patt
element._defaultName = None
ret = EditablePartial.from_call(railroad.Terminal, element.defaultName)
elif len(exprs) > 0:
ret = EditablePartial.from_call(railroad.Sequence, items=[])
else:

View file

@ -1,17 +1,20 @@
# exceptions.py
from __future__ import annotations
import copy
import re
import sys
import typing
from functools import cached_property
from .unicode import pyparsing_unicode as ppu
from .util import (
_collapse_string_to_ranges,
col,
line,
lineno,
_collapse_string_to_ranges,
replaced_by_pep8,
)
from .unicode import pyparsing_unicode as ppu
class _ExceptionWordUnicodeSet(
@ -31,7 +34,7 @@ class ParseBaseException(Exception):
msg: str
pstr: str
parser_element: typing.Any # "ParserElement"
args: typing.Tuple[str, int, typing.Optional[str]]
args: tuple[str, int, typing.Optional[str]]
__slots__ = (
"loc",
@ -50,18 +53,17 @@ class ParseBaseException(Exception):
msg: typing.Optional[str] = None,
elem=None,
):
self.loc = loc
if msg is None:
self.msg = pstr
self.pstr = ""
else:
msg, pstr = pstr, ""
self.loc = loc
self.msg = msg
self.pstr = pstr
self.parser_element = elem
self.args = (pstr, loc, msg)
@staticmethod
def explain_exception(exc, depth=16):
def explain_exception(exc: Exception, depth: int = 16) -> str:
"""
Method to take an exception and translate the Python internal traceback into a list
of the pyparsing expressions that caused the exception to be raised.
@ -82,17 +84,17 @@ class ParseBaseException(Exception):
if depth is None:
depth = sys.getrecursionlimit()
ret = []
ret: list[str] = []
if isinstance(exc, ParseBaseException):
ret.append(exc.line)
ret.append(f"{' ' * (exc.column - 1)}^")
ret.append(f"{type(exc).__name__}: {exc}")
if depth <= 0:
if depth <= 0 or exc.__traceback__ is None:
return "\n".join(ret)
callers = inspect.getinnerframes(exc.__traceback__, context=depth)
seen = set()
seen: set[int] = set()
for ff in callers[-depth:]:
frm = ff[0]
@ -125,41 +127,58 @@ class ParseBaseException(Exception):
return "\n".join(ret)
@classmethod
def _from_exception(cls, pe):
def _from_exception(cls, pe) -> ParseBaseException:
"""
internal factory method to simplify creating one type of ParseException
from another - avoids having __init__ signature conflicts among subclasses
"""
return cls(pe.pstr, pe.loc, pe.msg, pe.parser_element)
@property
@cached_property
def line(self) -> str:
"""
Return the line of text where the exception occurred.
"""
return line(self.loc, self.pstr)
@property
@cached_property
def lineno(self) -> int:
"""
Return the 1-based line number of text where the exception occurred.
"""
return lineno(self.loc, self.pstr)
@property
@cached_property
def col(self) -> int:
"""
Return the 1-based column on the line of text where the exception occurred.
"""
return col(self.loc, self.pstr)
@property
@cached_property
def column(self) -> int:
"""
Return the 1-based column on the line of text where the exception occurred.
"""
return col(self.loc, self.pstr)
@cached_property
def found(self) -> str:
if not self.pstr:
return ""
if self.loc >= len(self.pstr):
return "end of text"
# pull out next word at error location
found_match = _exception_word_extractor.match(self.pstr, self.loc)
if found_match is not None:
found_text = found_match.group(0)
else:
found_text = self.pstr[self.loc : self.loc + 1]
return repr(found_text).replace(r"\\", "\\")
# pre-PEP8 compatibility
@property
def parserElement(self):
@ -169,21 +188,15 @@ class ParseBaseException(Exception):
def parserElement(self, elem):
self.parser_element = elem
def copy(self):
return copy.copy(self)
def formatted_message(self) -> str:
found_phrase = f", found {self.found}" if self.found else ""
return f"{self.msg}{found_phrase} (at char {self.loc}), (line:{self.lineno}, col:{self.column})"
def __str__(self) -> str:
if self.pstr:
if self.loc >= len(self.pstr):
foundstr = ", found end of text"
else:
# pull out next word at error location
found_match = _exception_word_extractor.match(self.pstr, self.loc)
if found_match is not None:
found = found_match.group(0)
else:
found = self.pstr[self.loc : self.loc + 1]
foundstr = (", found %r" % found).replace(r"\\", "\\")
else:
foundstr = ""
return f"{self.msg}{foundstr} (at char {self.loc}), (line:{self.lineno}, col:{self.column})"
return self.formatted_message()
def __repr__(self):
return str(self)
@ -199,12 +212,10 @@ class ParseBaseException(Exception):
line_str = self.line
line_column = self.column - 1
if markerString:
line_str = "".join(
(line_str[:line_column], markerString, line_str[line_column:])
)
line_str = f"{line_str[:line_column]}{markerString}{line_str[line_column:]}"
return line_str.strip()
def explain(self, depth=16) -> str:
def explain(self, depth: int = 16) -> str:
"""
Method to translate the Python internal traceback into a list
of the pyparsing expressions that caused the exception to be raised.
@ -292,6 +303,8 @@ class RecursiveGrammarException(Exception):
Exception thrown by :class:`ParserElement.validate` if the
grammar could be left-recursive; parser may need to enable
left recursion using :class:`ParserElement.enable_left_recursion<ParserElement.enable_left_recursion>`
Deprecated: only used by deprecated method ParserElement.validate.
"""
def __init__(self, parseElementList):

View file

@ -1,5 +1,6 @@
# helpers.py
import html.entities
import operator
import re
import sys
import typing
@ -10,6 +11,7 @@ from .util import (
_bslash,
_flatten,
_escape_regex_range_chars,
make_compressed_re,
replaced_by_pep8,
)
@ -203,15 +205,15 @@ def one_of(
)
if caseless:
isequal = lambda a, b: a.upper() == b.upper()
is_equal = lambda a, b: a.upper() == b.upper()
masks = lambda a, b: b.upper().startswith(a.upper())
parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
parse_element_class = CaselessKeyword if asKeyword else CaselessLiteral
else:
isequal = lambda a, b: a == b
is_equal = operator.eq
masks = lambda a, b: b.startswith(a)
parseElementClass = Keyword if asKeyword else Literal
parse_element_class = Keyword if asKeyword else Literal
symbols: List[str] = []
symbols: list[str]
if isinstance(strs, str_type):
strs = typing.cast(str, strs)
symbols = strs.split()
@ -224,15 +226,14 @@ def one_of(
# reorder given symbols to take care to avoid masking longer choices with shorter ones
# (but only if the given symbols are not just single characters)
if any(len(sym) > 1 for sym in symbols):
i = 0
while i < len(symbols) - 1:
cur = symbols[i]
for j, other in enumerate(symbols[i + 1 :]):
if isequal(other, cur):
if is_equal(other, cur):
del symbols[i + j + 1]
break
if masks(cur, other):
if len(other) > len(cur) and masks(cur, other):
del symbols[i + j + 1]
symbols.insert(i, other)
break
@ -269,7 +270,7 @@ def one_of(
)
# last resort, just use MatchFirst
return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
return MatchFirst(parse_element_class(sym) for sym in symbols).set_name(
" | ".join(symbols)
)
@ -602,7 +603,7 @@ def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">"))
def make_html_tags(
tag_str: Union[str, ParserElement]
) -> Tuple[ParserElement, ParserElement]:
) -> tuple[ParserElement, ParserElement]:
"""Helper to construct opening and closing tag expressions for HTML,
given a tag name. Matches tags in either upper or lower case,
attributes with namespaces and with quoted or unquoted values.
@ -629,7 +630,7 @@ def make_html_tags(
def make_xml_tags(
tag_str: Union[str, ParserElement]
) -> Tuple[ParserElement, ParserElement]:
) -> tuple[ParserElement, ParserElement]:
"""Helper to construct opening and closing tag expressions for XML,
given a tag name. Matches tags only in the given upper/lower case.
@ -645,9 +646,12 @@ any_open_tag, any_close_tag = make_html_tags(
)
_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
"common HTML entity"
_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace(
" ", "|"
)
common_html_entity = Regex(
lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});"
).set_name("common HTML entity")
def replace_html_entity(s, l, t):
@ -664,16 +668,16 @@ class OpAssoc(Enum):
InfixNotationOperatorArgType = Union[
ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]]
]
InfixNotationOperatorSpec = Union[
Tuple[
tuple[
InfixNotationOperatorArgType,
int,
OpAssoc,
typing.Optional[ParseAction],
],
Tuple[
tuple[
InfixNotationOperatorArgType,
int,
OpAssoc,
@ -683,7 +687,7 @@ InfixNotationOperatorSpec = Union[
def infix_notation(
base_expr: ParserElement,
op_list: List[InfixNotationOperatorSpec],
op_list: list[InfixNotationOperatorSpec],
lpar: Union[str, ParserElement] = Suppress("("),
rpar: Union[str, ParserElement] = Suppress(")"),
) -> ParserElement:
@ -1032,7 +1036,7 @@ python_style_comment = Regex(r"#.*").set_name("Python style comment")
# build list of built-in expressions, for future reference if a global default value
# gets updated
_builtin_exprs: List[ParserElement] = [
_builtin_exprs: list[ParserElement] = [
v for v in vars().values() if isinstance(v, ParserElement)
]

View file

@ -1,4 +1,7 @@
# results.py
from __future__ import annotations
import collections
from collections.abc import (
MutableMapping,
Mapping,
@ -7,21 +10,21 @@ from collections.abc import (
Iterable,
)
import pprint
from typing import Tuple, Any, Dict, Set, List
from typing import Any
from .util import replaced_by_pep8
str_type: Tuple[type, ...] = (str, bytes)
str_type: tuple[type, ...] = (str, bytes)
_generator_type = type((_ for _ in ()))
class _ParseResultsWithOffset:
tup: Tuple["ParseResults", int]
tup: tuple[ParseResults, int]
__slots__ = ["tup"]
def __init__(self, p1: "ParseResults", p2: int):
self.tup: Tuple[ParseResults, int] = (p1, p2)
def __init__(self, p1: ParseResults, p2: int):
self.tup: tuple[ParseResults, int] = (p1, p2)
def __getitem__(self, i):
return self.tup[i]
@ -79,14 +82,14 @@ class ParseResults:
- year: '1999'
"""
_null_values: Tuple[Any, ...] = (None, [], ())
_null_values: tuple[Any, ...] = (None, [], ())
_name: str
_parent: "ParseResults"
_all_names: Set[str]
_parent: ParseResults
_all_names: set[str]
_modal: bool
_toklist: List[Any]
_tokdict: Dict[str, Any]
_toklist: list[Any]
_tokdict: dict[str, Any]
__slots__ = (
"_name",
@ -172,8 +175,8 @@ class ParseResults:
# constructor as small and fast as possible
def __init__(
self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
):
self._tokdict: Dict[str, _ParseResultsWithOffset]
) -> None:
self._tokdict: dict[str, _ParseResultsWithOffset]
self._modal = modal
if name is None or name == "":
@ -226,7 +229,7 @@ class ParseResults:
self._toklist[k] = v
sub = v
else:
self._tokdict[k] = self._tokdict.get(k, list()) + [
self._tokdict[k] = self._tokdict.get(k, []) + [
_ParseResultsWithOffset(v, 0)
]
sub = v
@ -443,12 +446,12 @@ class ParseResults:
raise AttributeError(name)
return ""
def __add__(self, other: "ParseResults") -> "ParseResults":
def __add__(self, other: ParseResults) -> ParseResults:
ret = self.copy()
ret += other
return ret
def __iadd__(self, other: "ParseResults") -> "ParseResults":
def __iadd__(self, other: ParseResults) -> ParseResults:
if not other:
return self
@ -470,7 +473,7 @@ class ParseResults:
self._all_names |= other._all_names
return self
def __radd__(self, other) -> "ParseResults":
def __radd__(self, other) -> ParseResults:
if isinstance(other, int) and other == 0:
# useful for merging many ParseResults using sum() builtin
return self.copy()
@ -504,9 +507,10 @@ class ParseResults:
out.append(str(item))
return out
def as_list(self) -> list:
def as_list(self, *, flatten: bool = False) -> list:
"""
Returns the parse results as a nested list of matching tokens, all converted to strings.
If flatten is True, all the nesting levels in the returned list are collapsed.
Example::
@ -519,6 +523,18 @@ class ParseResults:
result_list = result.as_list()
print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
"""
def flattened(pr):
to_visit = collections.deque([*self])
while to_visit:
to_do = to_visit.popleft()
if isinstance(to_do, ParseResults):
to_visit.extendleft(to_do[::-1])
else:
yield to_do
if flatten:
return [*flattened(self)]
else:
return [
res.as_list() if isinstance(res, ParseResults) else res
for res in self._toklist
@ -553,7 +569,7 @@ class ParseResults:
return dict((k, to_item(v)) for k, v in self.items())
def copy(self) -> "ParseResults":
def copy(self) -> ParseResults:
"""
Returns a new shallow copy of a :class:`ParseResults` object. `ParseResults`
items contained within the source are shared with the copy. Use
@ -567,7 +583,7 @@ class ParseResults:
ret._name = self._name
return ret
def deepcopy(self) -> "ParseResults":
def deepcopy(self) -> ParseResults:
"""
Returns a new deep copy of a :class:`ParseResults` object.
"""
@ -584,11 +600,11 @@ class ParseResults:
dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v
elif isinstance(obj, Iterable):
ret._toklist[i] = type(obj)(
v.deepcopy() if isinstance(v, ParseResults) else v for v in obj
v.deepcopy() if isinstance(v, ParseResults) else v for v in obj # type: ignore[call-arg]
)
return ret
def get_name(self) -> str:
def get_name(self) -> str | None:
r"""
Returns the results name for this token expression. Useful when several
different expressions might match at a particular location.
@ -616,7 +632,7 @@ class ParseResults:
if self._name:
return self._name
elif self._parent:
par: "ParseResults" = self._parent
par: ParseResults = self._parent
parent_tokdict_items = par._tokdict.items()
return next(
(
@ -761,7 +777,7 @@ class ParseResults:
return dir(type(self)) + list(self.keys())
@classmethod
def from_dict(cls, other, name=None) -> "ParseResults":
def from_dict(cls, other, name=None) -> ParseResults:
"""
Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
name-value relations as results names. If an optional ``name`` argument is

View file

@ -257,10 +257,14 @@ class pyparsing_test:
eol_mark: str = "|",
mark_spaces: typing.Optional[str] = None,
mark_control: typing.Optional[str] = None,
*,
indent: typing.Union[str, int] = "",
base_1: bool = True,
) -> str:
"""
Helpful method for debugging a parser - prints a string with line and column numbers.
(Line and column numbers are 1-based.)
(Line and column numbers are 1-based by default - if debugging a parse action,
pass base_1=False, to correspond to the loc value passed to the parse action.)
:param s: tuple(bool, str - string to be printed with line and column numbers
:param start_line: int - (optional) starting line number in s to print (default=1)
@ -273,11 +277,18 @@ class pyparsing_test:
- "unicode" - replaces control chars with Unicode symbols, such as "" and ""
- any single character string - replace control characters with given string
- None (default) - string is displayed as-is
:param indent: str | int - (optional) string to indent with line and column numbers; if an int
is passed, converted to " " * indent
:param base_1: bool - (optional) whether to label string using base 1; if False, string will be
labeled based at 0 (default=True)
:return: str - input string with leading line numbers and column number headers
"""
if expand_tabs:
s = s.expandtabs()
if isinstance(indent, int):
indent = " " * indent
indent = indent.expandtabs()
if mark_control is not None:
mark_control = typing.cast(str, mark_control)
if mark_control == "unicode":
@ -300,46 +311,52 @@ class pyparsing_test:
else:
s = s.replace(" ", mark_spaces)
if start_line is None:
start_line = 1
start_line = 0
if end_line is None:
end_line = len(s)
end_line = min(end_line, len(s))
start_line = min(max(1, start_line), end_line)
start_line = min(max(0, start_line), end_line)
if mark_control != "unicode":
s_lines = s.splitlines()[start_line - 1 : end_line]
s_lines = s.splitlines()[start_line - base_1 : end_line]
else:
s_lines = [line + "" for line in s.split("")[start_line - 1 : end_line]]
s_lines = [
line + "" for line in s.split("")[start_line - base_1 : end_line]
]
if not s_lines:
return ""
lineno_width = len(str(end_line))
max_line_len = max(len(line) for line in s_lines)
lead = " " * (lineno_width + 1)
lead = indent + " " * (lineno_width + 1)
if max_line_len >= 99:
header0 = (
lead
+ ("" if base_1 else " ")
+ "".join(
f"{' ' * 99}{(i + 1) % 100}"
for i in range(max(max_line_len // 100, 1))
for i in range(1 if base_1 else 0, max(max_line_len // 100, 1))
)
+ "\n"
)
else:
header0 = ""
header1 = (
header0
("" if base_1 else " ")
+ lead
+ "".join(f" {(i + 1) % 10}" for i in range(-(-max_line_len // 10)))
+ "\n"
)
header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n"
digits = "1234567890"
header2 = (
lead + ("" if base_1 else "0") + digits * (-(-max_line_len // 10)) + "\n"
)
return (
header1
+ header2
+ "\n".join(
f"{i:{lineno_width}d}:{line}{eol_mark}"
for i, line in enumerate(s_lines, start=start_line)
f"{indent}{i:{lineno_width}d}:{line}{eol_mark}"
for i, line in enumerate(s_lines, start=start_line + base_1)
)
+ "\n"
)

View file

@ -2,7 +2,7 @@
import sys
from itertools import filterfalse
from typing import List, Tuple, Union
from typing import Union
class _lazyclassproperty:
@ -25,7 +25,7 @@ class _lazyclassproperty:
return cls._intern[attrname]
UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
UnicodeRangeList = list[Union[tuple[int, int], tuple[int]]]
class unicode_set:
@ -53,9 +53,9 @@ class unicode_set:
_ranges: UnicodeRangeList = []
@_lazyclassproperty
def _chars_for_ranges(cls) -> List[str]:
ret: List[int] = []
for cc in cls.__mro__:
def _chars_for_ranges(cls) -> list[str]:
ret: list[int] = []
for cc in cls.__mro__: # type: ignore[attr-defined]
if cc is unicode_set:
break
for rr in getattr(cc, "_ranges", ()):

View file

@ -1,11 +1,11 @@
# util.py
import inspect
import warnings
import types
import collections
import itertools
import contextlib
from functools import lru_cache, wraps
from typing import Callable, List, Union, Iterable, TypeVar, cast
import inspect
import itertools
import types
from typing import Callable, Union, Iterable, TypeVar, cast
import warnings
_bslash = chr(92)
C = TypeVar("C", bound=Callable)
@ -14,8 +14,8 @@ C = TypeVar("C", bound=Callable)
class __config_flags:
"""Internal class for defining compatibility and debugging flags"""
_all_names: List[str] = []
_fixed_names: List[str] = []
_all_names: list[str] = []
_fixed_names: list[str] = []
_type_desc = "configuration"
@classmethod
@ -100,27 +100,24 @@ class _UnboundedCache:
class _FifoCache:
def __init__(self, size):
self.not_in_cache = not_in_cache = object()
cache = {}
keyring = [object()] * size
self.size = size
self.not_in_cache = not_in_cache = object()
cache_get = cache.get
cache_pop = cache.pop
keyiter = itertools.cycle(range(size))
def get(_, key):
return cache_get(key, not_in_cache)
def set_(_, key, value):
cache[key] = value
i = next(keyiter)
cache_pop(keyring[i], None)
keyring[i] = key
while len(cache) > size:
# pop oldest element in cache by getting the first key
cache_pop(next(iter(cache)))
def clear(_):
cache.clear()
keyring[:] = [object()] * size
self.size = size
self.get = types.MethodType(get, self)
self.set = types.MethodType(set_, self)
self.clear = types.MethodType(clear, self)
@ -137,13 +134,13 @@ class LRUMemo:
def __init__(self, capacity):
self._capacity = capacity
self._active = {}
self._memory = collections.OrderedDict()
self._memory = {}
def __getitem__(self, key):
try:
return self._active[key]
except KeyError:
self._memory.move_to_end(key)
self._memory[key] = self._memory.pop(key)
return self._memory[key]
def __setitem__(self, key, value):
@ -156,8 +153,9 @@ class LRUMemo:
except KeyError:
pass
else:
while len(self._memory) >= self._capacity:
self._memory.popitem(last=False)
oldest_keys = list(self._memory)[: -(self._capacity + 1)]
for key_to_delete in oldest_keys:
self._memory.pop(key_to_delete)
self._memory[key] = value
def clear(self):
@ -183,60 +181,182 @@ def _escape_regex_range_chars(s: str) -> str:
return str(s)
class _GroupConsecutive:
"""
Used as a callable `key` for itertools.groupby to group
characters that are consecutive:
itertools.groupby("abcdejkmpqrs", key=IsConsecutive())
yields:
(0, iter(['a', 'b', 'c', 'd', 'e']))
(1, iter(['j', 'k']))
(2, iter(['m']))
(3, iter(['p', 'q', 'r', 's']))
"""
def __init__(self):
self.prev = 0
self.counter = itertools.count()
self.value = -1
def __call__(self, char: str) -> int:
c_int = ord(char)
self.prev, prev = c_int, self.prev
if c_int - prev > 1:
self.value = next(self.counter)
return self.value
def _collapse_string_to_ranges(
s: Union[str, Iterable[str]], re_escape: bool = True
) -> str:
def is_consecutive(c):
c_int = ord(c)
is_consecutive.prev, prev = c_int, is_consecutive.prev
if c_int - prev > 1:
is_consecutive.value = next(is_consecutive.counter)
return is_consecutive.value
r"""
Take a string or list of single-character strings, and return
a string of the consecutive characters in that string collapsed
into groups, as might be used in a regular expression '[a-z]'
character set:
'a' -> 'a' -> '[a]'
'bc' -> 'bc' -> '[bc]'
'defgh' -> 'd-h' -> '[d-h]'
'fdgeh' -> 'd-h' -> '[d-h]'
'jklnpqrtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'
Duplicates get collapsed out:
'aaa' -> 'a' -> '[a]'
'bcbccb' -> 'bc' -> '[bc]'
'defghhgf' -> 'd-h' -> '[d-h]'
'jklnpqrjjjtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'
Spaces are preserved:
'ab c' -> ' a-c' -> '[ a-c]'
Characters that are significant when defining regex ranges
get escaped:
'acde[]-' -> r'\-\[\]ac-e' -> r'[\-\[\]ac-e]'
"""
is_consecutive.prev = 0 # type: ignore [attr-defined]
is_consecutive.counter = itertools.count() # type: ignore [attr-defined]
is_consecutive.value = -1 # type: ignore [attr-defined]
# Developer notes:
# - Do not optimize this code assuming that the given input string
# or internal lists will be short (such as in loading generators into
# lists to make it easier to find the last element); this method is also
# used to generate regex ranges for character sets in the pyparsing.unicode
# classes, and these can be _very_ long lists of strings
def escape_re_range_char(c):
def escape_re_range_char(c: str) -> str:
return "\\" + c if c in r"\^-][" else c
def no_escape_re_range_char(c):
def no_escape_re_range_char(c: str) -> str:
return c
if not re_escape:
escape_re_range_char = no_escape_re_range_char
ret = []
s = "".join(sorted(set(s)))
if len(s) > 3:
for _, chars in itertools.groupby(s, key=is_consecutive):
# reduce input string to remove duplicates, and put in sorted order
s_chars: list[str] = sorted(set(s))
if len(s_chars) > 2:
# find groups of characters that are consecutive (can be collapsed
# down to "<first>-<last>")
for _, chars in itertools.groupby(s_chars, key=_GroupConsecutive()):
# _ is unimportant, is just used to identify groups
# chars is an iterator of one or more consecutive characters
# that comprise the current group
first = last = next(chars)
last = collections.deque(
itertools.chain(iter([last]), chars), maxlen=1
).pop()
with contextlib.suppress(ValueError):
*_, last = chars
if first == last:
# there was only a single char in this group
ret.append(escape_re_range_char(first))
elif last == chr(ord(first) + 1):
# there were only 2 characters in this group
# 'a','b' -> 'ab'
ret.append(f"{escape_re_range_char(first)}{escape_re_range_char(last)}")
else:
sep = "" if ord(last) == ord(first) + 1 else "-"
# there were > 2 characters in this group, make into a range
# 'c','d','e' -> 'c-e'
ret.append(
f"{escape_re_range_char(first)}{sep}{escape_re_range_char(last)}"
f"{escape_re_range_char(first)}-{escape_re_range_char(last)}"
)
else:
ret = [escape_re_range_char(c) for c in s]
# only 1 or 2 chars were given to form into groups
# 'a' -> ['a']
# 'bc' -> ['b', 'c']
# 'dg' -> ['d', 'g']
# no need to list them with "-", just return as a list
# (after escaping)
ret = [escape_re_range_char(c) for c in s_chars]
return "".join(ret)
def _flatten(ll: list) -> list:
def _flatten(ll: Iterable) -> list:
ret = []
for i in ll:
if isinstance(i, list):
ret.extend(_flatten(i))
to_visit = [*ll]
while to_visit:
i = to_visit.pop(0)
if isinstance(i, Iterable) and not isinstance(i, str):
to_visit[:0] = i
else:
ret.append(i)
return ret
def make_compressed_re(
word_list: Iterable[str], max_level: int = 2, _level: int = 1
) -> str:
"""
Create a regular expression string from a list of words, collapsing by common
prefixes and optional suffixes.
Calls itself recursively to build nested sublists for each group of suffixes
that have a shared prefix.
"""
def get_suffixes_from_common_prefixes(namelist: list[str]):
if len(namelist) > 1:
for prefix, suffixes in itertools.groupby(namelist, key=lambda s: s[:1]):
yield prefix, sorted([s[1:] for s in suffixes], key=len, reverse=True)
else:
yield namelist[0][0], [namelist[0][1:]]
if max_level == 0:
return "|".join(sorted(word_list, key=len, reverse=True))
ret = []
sep = ""
for initial, suffixes in get_suffixes_from_common_prefixes(sorted(word_list)):
ret.append(sep)
sep = "|"
trailing = ""
if "" in suffixes:
trailing = "?"
suffixes.remove("")
if len(suffixes) > 1:
if all(len(s) == 1 for s in suffixes):
ret.append(f"{initial}[{''.join(suffixes)}]{trailing}")
else:
if _level < max_level:
suffix_re = make_compressed_re(
sorted(suffixes), max_level, _level + 1
)
ret.append(f"{initial}({suffix_re}){trailing}")
else:
suffixes.sort(key=len, reverse=True)
ret.append(f"{initial}({'|'.join(suffixes)}){trailing}")
else:
if suffixes:
suffix = suffixes[0]
if len(suffix) > 1 and trailing:
ret.append(f"{initial}({suffix}){trailing}")
else:
ret.append(f"{initial}{suffix}{trailing}")
else:
ret.append(initial)
return "".join(ret)
def replaced_by_pep8(compat_name: str, fn: C) -> C:
# In a future version, uncomment the code in the internal _inner() functions
# to begin emitting DeprecationWarnings.
@ -268,10 +388,10 @@ def replaced_by_pep8(compat_name: str, fn: C) -> C:
_inner.__name__ = compat_name
_inner.__annotations__ = fn.__annotations__
if isinstance(fn, types.FunctionType):
_inner.__kwdefaults__ = fn.__kwdefaults__
_inner.__kwdefaults__ = fn.__kwdefaults__ # type: ignore [attr-defined]
elif isinstance(fn, type) and hasattr(fn, "__init__"):
_inner.__kwdefaults__ = fn.__init__.__kwdefaults__
_inner.__kwdefaults__ = fn.__init__.__kwdefaults__ # type: ignore [misc,attr-defined]
else:
_inner.__kwdefaults__ = None
_inner.__kwdefaults__ = None # type: ignore [attr-defined]
_inner.__qualname__ = fn.__qualname__
return cast(C, _inner)

View file

@ -29,7 +29,7 @@ plexapi==4.15.16
portend==3.2.0
profilehooks==1.13.0
PyJWT==2.9.0
pyparsing==3.1.4
pyparsing==3.2.0
python-dateutil==2.9.0.post0
python-twitter==3.5
pytz==2024.2