Bump pyparsing from 3.1.4 to 3.2.0 (#2437)

* Bump pyparsing from 3.1.4 to 3.2.0 Bumps [pyparsing](https://github.com/pyparsing/pyparsing) from 3.1.4 to 3.2.0. - [Release notes](https://github.com/pyparsing/pyparsing/releases) - [Changelog](https://github.com/pyparsing/pyparsing/blob/master/CHANGES) - [Commits](https://github.com/pyparsing/pyparsing/compare/3.1.4...3.2.0) --- updated-dependencies: - dependency-name: pyparsing dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update pyparsing==3.2.0 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-07-05 12:45:47 -07:00 · 2024-11-19 10:00:11 -08:00 · 2024-11-19 10:00:11 -08:00 · be2e63e7e0
commit be2e63e7e0
parent 2fe3f039cc
10 changed files with 624 additions and 373 deletions
--- a/lib/pyparsing/init.py
+++ b/lib/pyparsing/init.py
@ -120,8 +120,8 @@ class version_info(NamedTuple):
        return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"


-__version_info__ = version_info(3, 1, 4, "final", 1)
-__version_time__ = "25 Aug 2024 14:40 UTC"
+__version_info__ = version_info(3, 2, 0, "final", 1)
+__version_time__ = "13 Oct 2024 09:46 UTC"
 __version__ = __version_info__.__version__
 __versionTime__ = __version_time__
 __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
@ -131,9 +131,9 @@ from .exceptions import *
 from .actions import *
 from .core import __diag__, __compat__
 from .results import *
-from .core import *  # type: ignore[misc, assignment]
+from .core import *
 from .core import _builtin_exprs as core_builtin_exprs
-from .helpers import *  # type: ignore[misc, assignment]
+from .helpers import *
 from .helpers import _builtin_exprs as helper_builtin_exprs

 from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
@ -147,9 +147,9 @@ from .common import (
 if "pyparsing_unicode" not in globals():
    pyparsing_unicode = unicode  # type: ignore[misc]
 if "pyparsing_common" not in globals():
-    pyparsing_common = common  # type: ignore[misc]
+    pyparsing_common = common
 if "pyparsing_test" not in globals():
-    pyparsing_test = testing  # type: ignore[misc]
+    pyparsing_test = testing

 core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs

@ -208,6 +208,7 @@ __all__ = [
    "StringEnd",
    "StringStart",
    "Suppress",
+    "Tag",
    "Token",
    "TokenConverter",
    "White",
--- a/lib/pyparsing/core.py
+++ b/lib/pyparsing/core.py
--- a/lib/pyparsing/diagram/init.py
+++ b/lib/pyparsing/diagram/init.py
@ -1,20 +1,20 @@
 # mypy: ignore-errors
+from __future__ import annotations
+
 import railroad
 import pyparsing
+import dataclasses
 import typing
 from typing import (
-    List,
-    NamedTuple,
    Generic,
    TypeVar,
-    Dict,
    Callable,
-    Set,
    Iterable,
 )
 from jinja2 import Template
 from io import StringIO
 import inspect
+import re


 jinja2_template_source = """\
@ -55,14 +55,23 @@ jinja2_template_source = """\

 template = Template(jinja2_template_source)

-# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
-NamedDiagram = NamedTuple(
-    "NamedDiagram",
-    [("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)],
-)
-"""
-A simple structure for associating a name with a railroad diagram
-"""
+
+def _collapse_verbose_regex(regex_str: str) -> str:
+    collapsed = pyparsing.Regex(r"#.*").suppress().transform_string(regex_str)
+    collapsed = re.sub(r"\s*\n\s*", "", collapsed)
+    return collapsed
+
+
+@dataclasses.dataclass
+class NamedDiagram:
+    """
+    A simple structure for associating a name with a railroad diagram
+    """
+
+    name: str
+    index: int
+    diagram: railroad.DiagramItem = None
+

 T = TypeVar("T")

@ -108,7 +117,7 @@ class EditablePartial(Generic[T]):
        self.kwargs = kwargs

    @classmethod
-    def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
+    def from_call(cls, func: Callable[..., T], *args, **kwargs) -> EditablePartial[T]:
        """
        If you call this function in the same way that you would call the constructor, it will store the arguments
        as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
@ -135,7 +144,7 @@ class EditablePartial(Generic[T]):
        return self.func(*args, **kwargs)


-def railroad_to_html(diagrams: List[NamedDiagram], embed=False, **kwargs) -> str:
+def railroad_to_html(diagrams: list[NamedDiagram], embed=False, **kwargs) -> str:
    """
    Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
    :params kwargs: kwargs to be passed in to the template
@ -158,7 +167,7 @@ def railroad_to_html(diagrams: List[NamedDiagram], embed=False, **kwargs) -> str
    return template.render(diagrams=data, embed=embed, **kwargs)


-def resolve_partial(partial: "EditablePartial[T]") -> T:
+def resolve_partial(partial: EditablePartial[T]) -> T:
    """
    Recursively resolves a collection of Partials into whatever type they are
    """
@ -180,7 +189,7 @@ def to_railroad(
    vertical: int = 3,
    show_results_names: bool = False,
    show_groups: bool = False,
-) -> List[NamedDiagram]:
+) -> list[NamedDiagram]:
    """
    Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
    creation if you want to access the Railroad tree before it is converted to HTML
@ -244,40 +253,31 @@ def _should_vertical(
        return len(_visible_exprs(exprs)) >= specification


+@dataclasses.dataclass
 class ElementState:
    """
    State recorded for an individual pyparsing Element
    """

-    # Note: this should be a dataclass, but we have to support Python 3.5
-    def __init__(
-        self,
-        element: pyparsing.ParserElement,
-        converted: EditablePartial,
-        parent: EditablePartial,
-        number: int,
-        name: str = None,
-        parent_index: typing.Optional[int] = None,
-    ):
-        #: The pyparsing element that this represents
-        self.element: pyparsing.ParserElement = element
-        #: The name of the element
-        self.name: typing.Optional[str] = name
-        #: The output Railroad element in an unconverted state
-        self.converted: EditablePartial = converted
-        #: The parent Railroad element, which we store so that we can extract this if it's duplicated
-        self.parent: EditablePartial = parent
-        #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
-        self.number: int = number
-        #: The index of this inside its parent
-        self.parent_index: typing.Optional[int] = parent_index
-        #: If true, we should extract this out into a subdiagram
-        self.extract: bool = False
-        #: If true, all of this element's children have been filled out
-        self.complete: bool = False
+    #: The pyparsing element that this represents
+    element: pyparsing.ParserElement
+    #: The output Railroad element in an unconverted state
+    converted: EditablePartial
+    #: The parent Railroad element, which we store so that we can extract this if it's duplicated
+    parent: EditablePartial
+    #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
+    number: int
+    #: The name of the element
+    name: str = None
+    #: The index of this inside its parent
+    parent_index: typing.Optional[int] = None
+    #: If true, we should extract this out into a subdiagram
+    extract: bool = False
+    #: If true, all of this element's children have been filled out
+    complete: bool = False

    def mark_for_extraction(
-        self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
+        self, el_id: int, state: ConverterState, name: str = None, force: bool = False
    ):
        """
        Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
@ -313,16 +313,16 @@ class ConverterState:

    def __init__(self, diagram_kwargs: typing.Optional[dict] = None):
        #: A dictionary mapping ParserElements to state relating to them
-        self._element_diagram_states: Dict[int, ElementState] = {}
+        self._element_diagram_states: dict[int, ElementState] = {}
        #: A dictionary mapping ParserElement IDs to subdiagrams generated from them
-        self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
+        self.diagrams: dict[int, EditablePartial[NamedDiagram]] = {}
        #: The index of the next unnamed element
        self.unnamed_index: int = 1
        #: The index of the next element. This is used for sorting
        self.index: int = 0
        #: Shared kwargs that are used to customize the construction of diagrams
        self.diagram_kwargs: dict = diagram_kwargs or {}
-        self.extracted_diagram_names: Set[str] = set()
+        self.extracted_diagram_names: set[str] = set()

    def __setitem__(self, key: int, value: ElementState):
        self._element_diagram_states[key] = value
@ -513,7 +513,7 @@ def _to_diagram_element(

    # If the element isn't worth extracting, we always treat it as the first time we say it
    if _worth_extracting(element):
-        if el_id in lookup:
+        if el_id in lookup and lookup[el_id].name is not None:
            # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
            # so we have to extract it into a new diagram.
            looked_up = lookup[el_id]
@ -618,6 +618,11 @@ def _to_diagram_element(
        ret = EditablePartial.from_call(railroad.Sequence, items=[])
    elif len(exprs) > 0 and not element_results_name:
        ret = EditablePartial.from_call(railroad.Group, item="", label=name)
+    elif isinstance(element, pyparsing.Regex):
+        patt = _collapse_verbose_regex(element.pattern)
+        element.pattern = patt
+        element._defaultName = None
+        ret = EditablePartial.from_call(railroad.Terminal, element.defaultName)
    elif len(exprs) > 0:
        ret = EditablePartial.from_call(railroad.Sequence, items=[])
    else:
--- a/lib/pyparsing/exceptions.py
+++ b/lib/pyparsing/exceptions.py
@ -1,17 +1,20 @@
 # exceptions.py
+from __future__ import annotations

+import copy
 import re
 import sys
 import typing
+from functools import cached_property

+from .unicode import pyparsing_unicode as ppu
 from .util import (
+    _collapse_string_to_ranges,
    col,
    line,
    lineno,
-    _collapse_string_to_ranges,
    replaced_by_pep8,
 )
-from .unicode import pyparsing_unicode as ppu


 class _ExceptionWordUnicodeSet(
@ -31,7 +34,7 @@ class ParseBaseException(Exception):
    msg: str
    pstr: str
    parser_element: typing.Any  # "ParserElement"
-    args: typing.Tuple[str, int, typing.Optional[str]]
+    args: tuple[str, int, typing.Optional[str]]

    __slots__ = (
        "loc",
@ -50,18 +53,17 @@ class ParseBaseException(Exception):
        msg: typing.Optional[str] = None,
        elem=None,
    ):
-        self.loc = loc
        if msg is None:
-            self.msg = pstr
-            self.pstr = ""
-        else:
-            self.msg = msg
-            self.pstr = pstr
+            msg, pstr = pstr, ""
+
+        self.loc = loc
+        self.msg = msg
+        self.pstr = pstr
        self.parser_element = elem
        self.args = (pstr, loc, msg)

    @staticmethod
-    def explain_exception(exc, depth=16):
+    def explain_exception(exc: Exception, depth: int = 16) -> str:
        """
        Method to take an exception and translate the Python internal traceback into a list
        of the pyparsing expressions that caused the exception to be raised.
@ -82,17 +84,17 @@ class ParseBaseException(Exception):

        if depth is None:
            depth = sys.getrecursionlimit()
-        ret = []
+        ret: list[str] = []
        if isinstance(exc, ParseBaseException):
            ret.append(exc.line)
            ret.append(f"{' ' * (exc.column - 1)}^")
        ret.append(f"{type(exc).__name__}: {exc}")

-        if depth <= 0:
+        if depth <= 0 or exc.__traceback__ is None:
            return "\n".join(ret)

        callers = inspect.getinnerframes(exc.__traceback__, context=depth)
-        seen = set()
+        seen: set[int] = set()
        for ff in callers[-depth:]:
            frm = ff[0]

@ -125,41 +127,58 @@ class ParseBaseException(Exception):
        return "\n".join(ret)

    @classmethod
-    def _from_exception(cls, pe):
+    def _from_exception(cls, pe) -> ParseBaseException:
        """
        internal factory method to simplify creating one type of ParseException
        from another - avoids having __init__ signature conflicts among subclasses
        """
        return cls(pe.pstr, pe.loc, pe.msg, pe.parser_element)

-    @property
+    @cached_property
    def line(self) -> str:
        """
        Return the line of text where the exception occurred.
        """
        return line(self.loc, self.pstr)

-    @property
+    @cached_property
    def lineno(self) -> int:
        """
        Return the 1-based line number of text where the exception occurred.
        """
        return lineno(self.loc, self.pstr)

-    @property
+    @cached_property
    def col(self) -> int:
        """
        Return the 1-based column on the line of text where the exception occurred.
        """
        return col(self.loc, self.pstr)

-    @property
+    @cached_property
    def column(self) -> int:
        """
        Return the 1-based column on the line of text where the exception occurred.
        """
        return col(self.loc, self.pstr)

+    @cached_property
+    def found(self) -> str:
+        if not self.pstr:
+            return ""
+
+        if self.loc >= len(self.pstr):
+            return "end of text"
+
+        # pull out next word at error location
+        found_match = _exception_word_extractor.match(self.pstr, self.loc)
+        if found_match is not None:
+            found_text = found_match.group(0)
+        else:
+            found_text = self.pstr[self.loc : self.loc + 1]
+
+        return repr(found_text).replace(r"\\", "\\")
+
    # pre-PEP8 compatibility
    @property
    def parserElement(self):
@ -169,21 +188,15 @@ class ParseBaseException(Exception):
    def parserElement(self, elem):
        self.parser_element = elem

+    def copy(self):
+        return copy.copy(self)
+
+    def formatted_message(self) -> str:
+        found_phrase = f", found {self.found}" if self.found else ""
+        return f"{self.msg}{found_phrase}  (at char {self.loc}), (line:{self.lineno}, col:{self.column})"
+
    def __str__(self) -> str:
-        if self.pstr:
-            if self.loc >= len(self.pstr):
-                foundstr = ", found end of text"
-            else:
-                # pull out next word at error location
-                found_match = _exception_word_extractor.match(self.pstr, self.loc)
-                if found_match is not None:
-                    found = found_match.group(0)
-                else:
-                    found = self.pstr[self.loc : self.loc + 1]
-                foundstr = (", found %r" % found).replace(r"\\", "\\")
-        else:
-            foundstr = ""
-        return f"{self.msg}{foundstr}  (at char {self.loc}), (line:{self.lineno}, col:{self.column})"
+        return self.formatted_message()

    def __repr__(self):
        return str(self)
@ -199,12 +212,10 @@ class ParseBaseException(Exception):
        line_str = self.line
        line_column = self.column - 1
        if markerString:
-            line_str = "".join(
-                (line_str[:line_column], markerString, line_str[line_column:])
-            )
+            line_str = f"{line_str[:line_column]}{markerString}{line_str[line_column:]}"
        return line_str.strip()

-    def explain(self, depth=16) -> str:
+    def explain(self, depth: int = 16) -> str:
        """
        Method to translate the Python internal traceback into a list
        of the pyparsing expressions that caused the exception to be raised.
@ -292,6 +303,8 @@ class RecursiveGrammarException(Exception):
    Exception thrown by :class:`ParserElement.validate` if the
    grammar could be left-recursive; parser may need to enable
    left recursion using :class:`ParserElement.enable_left_recursion<ParserElement.enable_left_recursion>`
+
+    Deprecated: only used by deprecated method ParserElement.validate.
    """

    def __init__(self, parseElementList):
--- a/lib/pyparsing/helpers.py
+++ b/lib/pyparsing/helpers.py
@ -1,5 +1,6 @@
 # helpers.py
 import html.entities
+import operator
 import re
 import sys
 import typing
@ -10,6 +11,7 @@ from .util import (
    _bslash,
    _flatten,
    _escape_regex_range_chars,
+    make_compressed_re,
    replaced_by_pep8,
 )

@ -203,15 +205,15 @@ def one_of(
        )

    if caseless:
-        isequal = lambda a, b: a.upper() == b.upper()
+        is_equal = lambda a, b: a.upper() == b.upper()
        masks = lambda a, b: b.upper().startswith(a.upper())
-        parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
+        parse_element_class = CaselessKeyword if asKeyword else CaselessLiteral
    else:
-        isequal = lambda a, b: a == b
+        is_equal = operator.eq
        masks = lambda a, b: b.startswith(a)
-        parseElementClass = Keyword if asKeyword else Literal
+        parse_element_class = Keyword if asKeyword else Literal

-    symbols: List[str] = []
+    symbols: list[str]
    if isinstance(strs, str_type):
        strs = typing.cast(str, strs)
        symbols = strs.split()
@ -224,20 +226,19 @@ def one_of(

    # reorder given symbols to take care to avoid masking longer choices with shorter ones
    # (but only if the given symbols are not just single characters)
-    if any(len(sym) > 1 for sym in symbols):
-        i = 0
-        while i < len(symbols) - 1:
-            cur = symbols[i]
-            for j, other in enumerate(symbols[i + 1 :]):
-                if isequal(other, cur):
-                    del symbols[i + j + 1]
-                    break
-                if masks(cur, other):
-                    del symbols[i + j + 1]
-                    symbols.insert(i, other)
-                    break
-            else:
-                i += 1
+    i = 0
+    while i < len(symbols) - 1:
+        cur = symbols[i]
+        for j, other in enumerate(symbols[i + 1 :]):
+            if is_equal(other, cur):
+                del symbols[i + j + 1]
+                break
+            if len(other) > len(cur) and masks(cur, other):
+                del symbols[i + j + 1]
+                symbols.insert(i, other)
+                break
+        else:
+            i += 1

    if useRegex:
        re_flags: int = re.IGNORECASE if caseless else 0
@ -269,7 +270,7 @@ def one_of(
            )

    # last resort, just use MatchFirst
-    return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
+    return MatchFirst(parse_element_class(sym) for sym in symbols).set_name(
        " | ".join(symbols)
    )

@ -602,7 +603,7 @@ def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">"))

 def make_html_tags(
    tag_str: Union[str, ParserElement]
-) -> Tuple[ParserElement, ParserElement]:
+) -> tuple[ParserElement, ParserElement]:
    """Helper to construct opening and closing tag expressions for HTML,
    given a tag name. Matches tags in either upper or lower case,
    attributes with namespaces and with quoted or unquoted values.
@ -629,7 +630,7 @@ def make_html_tags(

 def make_xml_tags(
    tag_str: Union[str, ParserElement]
-) -> Tuple[ParserElement, ParserElement]:
+) -> tuple[ParserElement, ParserElement]:
    """Helper to construct opening and closing tag expressions for XML,
    given a tag name. Matches tags only in the given upper/lower case.

@ -645,9 +646,12 @@ any_open_tag, any_close_tag = make_html_tags(
 )

 _htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
-common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
-    "common HTML entity"
+_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace(
+    " ", "|"
 )
+common_html_entity = Regex(
+    lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});"
+).set_name("common HTML entity")


 def replace_html_entity(s, l, t):
@ -664,16 +668,16 @@ class OpAssoc(Enum):


 InfixNotationOperatorArgType = Union[
-    ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
+    ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]]
 ]
 InfixNotationOperatorSpec = Union[
-    Tuple[
+    tuple[
        InfixNotationOperatorArgType,
        int,
        OpAssoc,
        typing.Optional[ParseAction],
    ],
-    Tuple[
+    tuple[
        InfixNotationOperatorArgType,
        int,
        OpAssoc,
@ -683,7 +687,7 @@ InfixNotationOperatorSpec = Union[

 def infix_notation(
    base_expr: ParserElement,
-    op_list: List[InfixNotationOperatorSpec],
+    op_list: list[InfixNotationOperatorSpec],
    lpar: Union[str, ParserElement] = Suppress("("),
    rpar: Union[str, ParserElement] = Suppress(")"),
 ) -> ParserElement:
@ -1032,7 +1036,7 @@ python_style_comment = Regex(r"#.*").set_name("Python style comment")

 # build list of built-in expressions, for future reference if a global default value
 # gets updated
-_builtin_exprs: List[ParserElement] = [
+_builtin_exprs: list[ParserElement] = [
    v for v in vars().values() if isinstance(v, ParserElement)
 ]

--- a/lib/pyparsing/results.py
+++ b/lib/pyparsing/results.py
@ -1,4 +1,7 @@
 # results.py
+from __future__ import annotations
+
+import collections
 from collections.abc import (
    MutableMapping,
    Mapping,
@ -7,21 +10,21 @@ from collections.abc import (
    Iterable,
 )
 import pprint
-from typing import Tuple, Any, Dict, Set, List
+from typing import Any

 from .util import replaced_by_pep8


-str_type: Tuple[type, ...] = (str, bytes)
+str_type: tuple[type, ...] = (str, bytes)
 _generator_type = type((_ for _ in ()))


 class _ParseResultsWithOffset:
-    tup: Tuple["ParseResults", int]
+    tup: tuple[ParseResults, int]
    __slots__ = ["tup"]

-    def __init__(self, p1: "ParseResults", p2: int):
-        self.tup: Tuple[ParseResults, int] = (p1, p2)
+    def __init__(self, p1: ParseResults, p2: int):
+        self.tup: tuple[ParseResults, int] = (p1, p2)

    def __getitem__(self, i):
        return self.tup[i]
@ -79,14 +82,14 @@ class ParseResults:
        - year: '1999'
    """

-    _null_values: Tuple[Any, ...] = (None, [], ())
+    _null_values: tuple[Any, ...] = (None, [], ())

    _name: str
-    _parent: "ParseResults"
-    _all_names: Set[str]
+    _parent: ParseResults
+    _all_names: set[str]
    _modal: bool
-    _toklist: List[Any]
-    _tokdict: Dict[str, Any]
+    _toklist: list[Any]
+    _tokdict: dict[str, Any]

    __slots__ = (
        "_name",
@ -172,8 +175,8 @@ class ParseResults:
    # constructor as small and fast as possible
    def __init__(
        self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
-    ):
-        self._tokdict: Dict[str, _ParseResultsWithOffset]
+    ) -> None:
+        self._tokdict: dict[str, _ParseResultsWithOffset]
        self._modal = modal

        if name is None or name == "":
@ -226,7 +229,7 @@ class ParseResults:
            self._toklist[k] = v
            sub = v
        else:
-            self._tokdict[k] = self._tokdict.get(k, list()) + [
+            self._tokdict[k] = self._tokdict.get(k, []) + [
                _ParseResultsWithOffset(v, 0)
            ]
            sub = v
@ -443,12 +446,12 @@ class ParseResults:
                raise AttributeError(name)
            return ""

-    def __add__(self, other: "ParseResults") -> "ParseResults":
+    def __add__(self, other: ParseResults) -> ParseResults:
        ret = self.copy()
        ret += other
        return ret

-    def __iadd__(self, other: "ParseResults") -> "ParseResults":
+    def __iadd__(self, other: ParseResults) -> ParseResults:
        if not other:
            return self

@ -470,7 +473,7 @@ class ParseResults:
        self._all_names |= other._all_names
        return self

-    def __radd__(self, other) -> "ParseResults":
+    def __radd__(self, other) -> ParseResults:
        if isinstance(other, int) and other == 0:
            # useful for merging many ParseResults using sum() builtin
            return self.copy()
@ -504,9 +507,10 @@ class ParseResults:
                out.append(str(item))
        return out

-    def as_list(self) -> list:
+    def as_list(self, *, flatten: bool = False) -> list:
        """
        Returns the parse results as a nested list of matching tokens, all converted to strings.
+        If flatten is True, all the nesting levels in the returned list are collapsed.

        Example::

@ -519,10 +523,22 @@ class ParseResults:
            result_list = result.as_list()
            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
        """
-        return [
-            res.as_list() if isinstance(res, ParseResults) else res
-            for res in self._toklist
-        ]
+        def flattened(pr):
+            to_visit = collections.deque([*self])
+            while to_visit:
+                to_do = to_visit.popleft()
+                if isinstance(to_do, ParseResults):
+                    to_visit.extendleft(to_do[::-1])
+                else:
+                    yield to_do
+
+        if flatten:
+            return [*flattened(self)]
+        else:
+            return [
+                res.as_list() if isinstance(res, ParseResults) else res
+                for res in self._toklist
+            ]

    def as_dict(self) -> dict:
        """
@ -553,7 +569,7 @@ class ParseResults:

        return dict((k, to_item(v)) for k, v in self.items())

-    def copy(self) -> "ParseResults":
+    def copy(self) -> ParseResults:
        """
        Returns a new shallow copy of a :class:`ParseResults` object. `ParseResults`
        items contained within the source are shared with the copy. Use
@ -567,7 +583,7 @@ class ParseResults:
        ret._name = self._name
        return ret

-    def deepcopy(self) -> "ParseResults":
+    def deepcopy(self) -> ParseResults:
        """
        Returns a new deep copy of a :class:`ParseResults` object.
        """
@ -584,11 +600,11 @@ class ParseResults:
                    dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v
            elif isinstance(obj, Iterable):
                ret._toklist[i] = type(obj)(
-                    v.deepcopy() if isinstance(v, ParseResults) else v for v in obj
+                    v.deepcopy() if isinstance(v, ParseResults) else v for v in obj  # type: ignore[call-arg]
                )
        return ret

-    def get_name(self) -> str:
+    def get_name(self) -> str | None:
        r"""
        Returns the results name for this token expression. Useful when several
        different expressions might match at a particular location.
@ -616,7 +632,7 @@ class ParseResults:
        if self._name:
            return self._name
        elif self._parent:
-            par: "ParseResults" = self._parent
+            par: ParseResults = self._parent
            parent_tokdict_items = par._tokdict.items()
            return next(
                (
@ -761,7 +777,7 @@ class ParseResults:
        return dir(type(self)) + list(self.keys())

    @classmethod
-    def from_dict(cls, other, name=None) -> "ParseResults":
+    def from_dict(cls, other, name=None) -> ParseResults:
        """
        Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
        name-value relations as results names. If an optional ``name`` argument is
--- a/lib/pyparsing/testing.py
+++ b/lib/pyparsing/testing.py
@ -257,10 +257,14 @@ class pyparsing_test:
        eol_mark: str = "|",
        mark_spaces: typing.Optional[str] = None,
        mark_control: typing.Optional[str] = None,
+        *,
+        indent: typing.Union[str, int] = "",
+        base_1: bool = True,
    ) -> str:
        """
        Helpful method for debugging a parser - prints a string with line and column numbers.
-        (Line and column numbers are 1-based.)
+        (Line and column numbers are 1-based by default - if debugging a parse action,
+        pass base_1=False, to correspond to the loc value passed to the parse action.)

        :param s: tuple(bool, str - string to be printed with line and column numbers
        :param start_line: int - (optional) starting line number in s to print (default=1)
@ -273,11 +277,18 @@ class pyparsing_test:
                                 - "unicode" - replaces control chars with Unicode symbols, such as "␍" and "␊"
                                 - any single character string - replace control characters with given string
                                 - None (default) - string is displayed as-is
+        :param indent: str | int - (optional) string to indent with line and column numbers; if an int
+                                   is passed, converted to " " * indent
+        :param base_1: bool - (optional) whether to label string using base 1; if False, string will be
+                              labeled based at 0 (default=True)

        :return: str - input string with leading line numbers and column number headers
        """
        if expand_tabs:
            s = s.expandtabs()
+        if isinstance(indent, int):
+            indent = " " * indent
+        indent = indent.expandtabs()
        if mark_control is not None:
            mark_control = typing.cast(str, mark_control)
            if mark_control == "unicode":
@ -300,46 +311,52 @@ class pyparsing_test:
            else:
                s = s.replace(" ", mark_spaces)
        if start_line is None:
-            start_line = 1
+            start_line = 0
        if end_line is None:
            end_line = len(s)
        end_line = min(end_line, len(s))
-        start_line = min(max(1, start_line), end_line)
+        start_line = min(max(0, start_line), end_line)

        if mark_control != "unicode":
-            s_lines = s.splitlines()[start_line - 1 : end_line]
+            s_lines = s.splitlines()[start_line - base_1 : end_line]
        else:
-            s_lines = [line + "␊" for line in s.split("␊")[start_line - 1 : end_line]]
+            s_lines = [
+                line + "␊" for line in s.split("␊")[start_line - base_1 : end_line]
+            ]
        if not s_lines:
            return ""

        lineno_width = len(str(end_line))
        max_line_len = max(len(line) for line in s_lines)
-        lead = " " * (lineno_width + 1)
+        lead = indent + " " * (lineno_width + 1)
        if max_line_len >= 99:
            header0 = (
                lead
+                + ("" if base_1 else " ")
                + "".join(
                    f"{' ' * 99}{(i + 1) % 100}"
-                    for i in range(max(max_line_len // 100, 1))
+                    for i in range(1 if base_1 else 0, max(max_line_len // 100, 1))
                )
                + "\n"
            )
        else:
            header0 = ""
        header1 = (
-            header0
+            ("" if base_1 else " ")
            + lead
            + "".join(f"         {(i + 1) % 10}" for i in range(-(-max_line_len // 10)))
            + "\n"
        )
-        header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n"
+        digits = "1234567890"
+        header2 = (
+            lead + ("" if base_1 else "0") + digits * (-(-max_line_len // 10)) + "\n"
+        )
        return (
            header1
            + header2
            + "\n".join(
-                f"{i:{lineno_width}d}:{line}{eol_mark}"
-                for i, line in enumerate(s_lines, start=start_line)
+                f"{indent}{i:{lineno_width}d}:{line}{eol_mark}"
+                for i, line in enumerate(s_lines, start=start_line + base_1)
            )
            + "\n"
        )
--- a/lib/pyparsing/unicode.py
+++ b/lib/pyparsing/unicode.py
@ -2,7 +2,7 @@

 import sys
 from itertools import filterfalse
-from typing import List, Tuple, Union
+from typing import Union


 class _lazyclassproperty:
@ -25,7 +25,7 @@ class _lazyclassproperty:
        return cls._intern[attrname]


-UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
+UnicodeRangeList = list[Union[tuple[int, int], tuple[int]]]


 class unicode_set:
@ -53,9 +53,9 @@ class unicode_set:
    _ranges: UnicodeRangeList = []

    @_lazyclassproperty
-    def _chars_for_ranges(cls) -> List[str]:
-        ret: List[int] = []
-        for cc in cls.__mro__:
+    def _chars_for_ranges(cls) -> list[str]:
+        ret: list[int] = []
+        for cc in cls.__mro__:  # type: ignore[attr-defined]
            if cc is unicode_set:
                break
            for rr in getattr(cc, "_ranges", ()):
--- a/lib/pyparsing/util.py
+++ b/lib/pyparsing/util.py
@ -1,11 +1,11 @@
 # util.py
-import inspect
-import warnings
-import types
-import collections
-import itertools
+import contextlib
 from functools import lru_cache, wraps
-from typing import Callable, List, Union, Iterable, TypeVar, cast
+import inspect
+import itertools
+import types
+from typing import Callable, Union, Iterable, TypeVar, cast
+import warnings

 _bslash = chr(92)
 C = TypeVar("C", bound=Callable)
@ -14,8 +14,8 @@ C = TypeVar("C", bound=Callable)
 class __config_flags:
    """Internal class for defining compatibility and debugging flags"""

-    _all_names: List[str] = []
-    _fixed_names: List[str] = []
+    _all_names: list[str] = []
+    _fixed_names: list[str] = []
    _type_desc = "configuration"

    @classmethod
@ -100,27 +100,24 @@ class _UnboundedCache:

 class _FifoCache:
    def __init__(self, size):
-        self.not_in_cache = not_in_cache = object()
        cache = {}
-        keyring = [object()] * size
+        self.size = size
+        self.not_in_cache = not_in_cache = object()
        cache_get = cache.get
        cache_pop = cache.pop
-        keyiter = itertools.cycle(range(size))

        def get(_, key):
            return cache_get(key, not_in_cache)

        def set_(_, key, value):
            cache[key] = value
-            i = next(keyiter)
-            cache_pop(keyring[i], None)
-            keyring[i] = key
+            while len(cache) > size:
+                # pop oldest element in cache by getting the first key
+                cache_pop(next(iter(cache)))

        def clear(_):
            cache.clear()
-            keyring[:] = [object()] * size

-        self.size = size
        self.get = types.MethodType(get, self)
        self.set = types.MethodType(set_, self)
        self.clear = types.MethodType(clear, self)
@ -137,13 +134,13 @@ class LRUMemo:
    def __init__(self, capacity):
        self._capacity = capacity
        self._active = {}
-        self._memory = collections.OrderedDict()
+        self._memory = {}

    def __getitem__(self, key):
        try:
            return self._active[key]
        except KeyError:
-            self._memory.move_to_end(key)
+            self._memory[key] = self._memory.pop(key)
            return self._memory[key]

    def __setitem__(self, key, value):
@ -156,8 +153,9 @@ class LRUMemo:
        except KeyError:
            pass
        else:
-            while len(self._memory) >= self._capacity:
-                self._memory.popitem(last=False)
+            oldest_keys = list(self._memory)[: -(self._capacity + 1)]
+            for key_to_delete in oldest_keys:
+                self._memory.pop(key_to_delete)
            self._memory[key] = value

    def clear(self):
@ -183,60 +181,182 @@ def _escape_regex_range_chars(s: str) -> str:
    return str(s)


+class _GroupConsecutive:
+    """
+    Used as a callable `key` for itertools.groupby to group
+    characters that are consecutive:
+        itertools.groupby("abcdejkmpqrs", key=IsConsecutive())
+        yields:
+            (0, iter(['a', 'b', 'c', 'd', 'e']))
+            (1, iter(['j', 'k']))
+            (2, iter(['m']))
+            (3, iter(['p', 'q', 'r', 's']))
+    """
+    def __init__(self):
+        self.prev = 0
+        self.counter = itertools.count()
+        self.value = -1
+
+    def __call__(self, char: str) -> int:
+        c_int = ord(char)
+        self.prev, prev = c_int, self.prev
+        if c_int - prev > 1:
+            self.value = next(self.counter)
+        return self.value
+
+
 def _collapse_string_to_ranges(
    s: Union[str, Iterable[str]], re_escape: bool = True
 ) -> str:
-    def is_consecutive(c):
-        c_int = ord(c)
-        is_consecutive.prev, prev = c_int, is_consecutive.prev
-        if c_int - prev > 1:
-            is_consecutive.value = next(is_consecutive.counter)
-        return is_consecutive.value
+    r"""
+    Take a string or list of single-character strings, and return
+    a string of the consecutive characters in that string collapsed
+    into groups, as might be used in a regular expression '[a-z]'
+    character set:
+        'a' -> 'a' -> '[a]'
+        'bc' -> 'bc' -> '[bc]'
+        'defgh' -> 'd-h' -> '[d-h]'
+        'fdgeh' -> 'd-h' -> '[d-h]'
+        'jklnpqrtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'
+    Duplicates get collapsed out:
+        'aaa' -> 'a' -> '[a]'
+        'bcbccb' -> 'bc' -> '[bc]'
+        'defghhgf' -> 'd-h' -> '[d-h]'
+        'jklnpqrjjjtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'
+    Spaces are preserved:
+        'ab c' -> ' a-c' -> '[ a-c]'
+    Characters that are significant when defining regex ranges
+    get escaped:
+        'acde[]-' -> r'\-\[\]ac-e' -> r'[\-\[\]ac-e]'
+    """

-    is_consecutive.prev = 0  # type: ignore [attr-defined]
-    is_consecutive.counter = itertools.count()  # type: ignore [attr-defined]
-    is_consecutive.value = -1  # type: ignore [attr-defined]
+    # Developer notes:
+    # - Do not optimize this code assuming that the given input string
+    #   or internal lists will be short (such as in loading generators into
+    #   lists to make it easier to find the last element); this method is also
+    #   used to generate regex ranges for character sets in the pyparsing.unicode
+    #   classes, and these can be _very_ long lists of strings

-    def escape_re_range_char(c):
+    def escape_re_range_char(c: str) -> str:
        return "\\" + c if c in r"\^-][" else c

-    def no_escape_re_range_char(c):
+    def no_escape_re_range_char(c: str) -> str:
        return c

    if not re_escape:
        escape_re_range_char = no_escape_re_range_char

    ret = []
-    s = "".join(sorted(set(s)))
-    if len(s) > 3:
-        for _, chars in itertools.groupby(s, key=is_consecutive):
+
+    # reduce input string to remove duplicates, and put in sorted order
+    s_chars: list[str] = sorted(set(s))
+
+    if len(s_chars) > 2:
+        # find groups of characters that are consecutive (can be collapsed
+        # down to "<first>-<last>")
+        for _, chars in itertools.groupby(s_chars, key=_GroupConsecutive()):
+            # _ is unimportant, is just used to identify groups
+            # chars is an iterator of one or more consecutive characters
+            # that comprise the current group
            first = last = next(chars)
-            last = collections.deque(
-                itertools.chain(iter([last]), chars), maxlen=1
-            ).pop()
+            with contextlib.suppress(ValueError):
+                *_, last = chars
+
            if first == last:
+                # there was only a single char in this group
                ret.append(escape_re_range_char(first))
+
+            elif last == chr(ord(first) + 1):
+                # there were only 2 characters in this group
+                #   'a','b' -> 'ab'
+                ret.append(f"{escape_re_range_char(first)}{escape_re_range_char(last)}")
+
            else:
-                sep = "" if ord(last) == ord(first) + 1 else "-"
+                # there were > 2 characters in this group, make into a range
+                #   'c','d','e' -> 'c-e'
                ret.append(
-                    f"{escape_re_range_char(first)}{sep}{escape_re_range_char(last)}"
+                    f"{escape_re_range_char(first)}-{escape_re_range_char(last)}"
                )
    else:
-        ret = [escape_re_range_char(c) for c in s]
+        # only 1 or 2 chars were given to form into groups
+        #   'a' -> ['a']
+        #   'bc' -> ['b', 'c']
+        #   'dg' -> ['d', 'g']
+        # no need to list them with "-", just return as a list
+        # (after escaping)
+        ret = [escape_re_range_char(c) for c in s_chars]

    return "".join(ret)


-def _flatten(ll: list) -> list:
+def _flatten(ll: Iterable) -> list:
    ret = []
-    for i in ll:
-        if isinstance(i, list):
-            ret.extend(_flatten(i))
+    to_visit = [*ll]
+    while to_visit:
+        i = to_visit.pop(0)
+        if isinstance(i, Iterable) and not isinstance(i, str):
+            to_visit[:0] = i
        else:
            ret.append(i)
    return ret


+def make_compressed_re(
+    word_list: Iterable[str], max_level: int = 2, _level: int = 1
+) -> str:
+    """
+    Create a regular expression string from a list of words, collapsing by common
+    prefixes and optional suffixes.
+
+    Calls itself recursively to build nested sublists for each group of suffixes
+    that have a shared prefix.
+    """
+
+    def get_suffixes_from_common_prefixes(namelist: list[str]):
+        if len(namelist) > 1:
+            for prefix, suffixes in itertools.groupby(namelist, key=lambda s: s[:1]):
+                yield prefix, sorted([s[1:] for s in suffixes], key=len, reverse=True)
+        else:
+            yield namelist[0][0], [namelist[0][1:]]
+
+    if max_level == 0:
+        return "|".join(sorted(word_list, key=len, reverse=True))
+
+    ret = []
+    sep = ""
+    for initial, suffixes in get_suffixes_from_common_prefixes(sorted(word_list)):
+        ret.append(sep)
+        sep = "|"
+
+        trailing = ""
+        if "" in suffixes:
+            trailing = "?"
+            suffixes.remove("")
+
+        if len(suffixes) > 1:
+            if all(len(s) == 1 for s in suffixes):
+                ret.append(f"{initial}[{''.join(suffixes)}]{trailing}")
+            else:
+                if _level < max_level:
+                    suffix_re = make_compressed_re(
+                        sorted(suffixes), max_level, _level + 1
+                    )
+                    ret.append(f"{initial}({suffix_re}){trailing}")
+                else:
+                    suffixes.sort(key=len, reverse=True)
+                    ret.append(f"{initial}({'|'.join(suffixes)}){trailing}")
+        else:
+            if suffixes:
+                suffix = suffixes[0]
+                if len(suffix) > 1 and trailing:
+                    ret.append(f"{initial}({suffix}){trailing}")
+                else:
+                    ret.append(f"{initial}{suffix}{trailing}")
+            else:
+                ret.append(initial)
+    return "".join(ret)
+
+
 def replaced_by_pep8(compat_name: str, fn: C) -> C:
    # In a future version, uncomment the code in the internal _inner() functions
    # to begin emitting DeprecationWarnings.
@ -268,10 +388,10 @@ def replaced_by_pep8(compat_name: str, fn: C) -> C:
    _inner.__name__ = compat_name
    _inner.__annotations__ = fn.__annotations__
    if isinstance(fn, types.FunctionType):
-        _inner.__kwdefaults__ = fn.__kwdefaults__
+        _inner.__kwdefaults__ = fn.__kwdefaults__  # type: ignore [attr-defined]
    elif isinstance(fn, type) and hasattr(fn, "__init__"):
-        _inner.__kwdefaults__ = fn.__init__.__kwdefaults__
+        _inner.__kwdefaults__ = fn.__init__.__kwdefaults__  # type: ignore [misc,attr-defined]
    else:
-        _inner.__kwdefaults__ = None
+        _inner.__kwdefaults__ = None  # type: ignore [attr-defined]
    _inner.__qualname__ = fn.__qualname__
    return cast(C, _inner)
--- a/requirements.txt
+++ b/requirements.txt
@ -29,7 +29,7 @@ plexapi==4.15.16
 portend==3.2.0
 profilehooks==1.13.0
 PyJWT==2.9.0
-pyparsing==3.1.4
+pyparsing==3.2.0
 python-dateutil==2.9.0.post0
 python-twitter==3.5
 pytz==2024.2