diff --git a/lib/pyparsing/__init__.py b/lib/pyparsing/__init__.py index 7802ff15..3dbc3cf8 100644 --- a/lib/pyparsing/__init__.py +++ b/lib/pyparsing/__init__.py @@ -56,7 +56,7 @@ self-explanatory class names, and the use of :class:`'+'`, :class:`'|'`, :class:`'^'` and :class:`'&'` operators. The :class:`ParseResults` object returned from -:class:`ParserElement.parseString` can be +:class:`ParserElement.parse_string` can be accessed as a nested list, a dictionary, or an object with named attributes. @@ -85,11 +85,11 @@ classes inherit from. Use the docstrings for examples of how to: and :class:`'&'` operators to combine simple expressions into more complex ones - associate names with your parsed results using - :class:`ParserElement.setResultsName` + :class:`ParserElement.set_results_name` - access the parsed data, which is returned as a :class:`ParseResults` object - - find some helpful expression short-cuts like :class:`delimitedList` - and :class:`oneOf` + - find some helpful expression short-cuts like :class:`DelimitedList` + and :class:`one_of` - find more useful common expressions in the :class:`pyparsing_common` namespace class """ @@ -106,30 +106,22 @@ class version_info(NamedTuple): @property def __version__(self): return ( - "{}.{}.{}".format(self.major, self.minor, self.micro) + f"{self.major}.{self.minor}.{self.micro}" + ( - "{}{}{}".format( - "r" if self.releaselevel[0] == "c" else "", - self.releaselevel[0], - self.serial, - ), + f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}", "", )[self.releaselevel == "final"] ) def __str__(self): - return "{} {} / {}".format(__name__, self.__version__, __version_time__) + return f"{__name__} {self.__version__} / {__version_time__}" def __repr__(self): - return "{}.{}({})".format( - __name__, - type(self).__name__, - ", ".join("{}={!r}".format(*nv) for nv in zip(self._fields, self)), - ) + return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" -__version_info__ = version_info(3, 0, 9, "final", 0) -__version_time__ = "05 May 2022 07:02 UTC" +__version_info__ = version_info(3, 1, 1, "final", 1) +__version_time__ = "29 Jul 2023 22:27 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " @@ -139,9 +131,9 @@ from .exceptions import * from .actions import * from .core import __diag__, __compat__ from .results import * -from .core import * +from .core import * # type: ignore[misc, assignment] from .core import _builtin_exprs as core_builtin_exprs -from .helpers import * +from .helpers import * # type: ignore[misc, assignment] from .helpers import _builtin_exprs as helper_builtin_exprs from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode @@ -153,11 +145,11 @@ from .common import ( # define backward compat synonyms if "pyparsing_unicode" not in globals(): - pyparsing_unicode = unicode + pyparsing_unicode = unicode # type: ignore[misc] if "pyparsing_common" not in globals(): - pyparsing_common = common + pyparsing_common = common # type: ignore[misc] if "pyparsing_test" not in globals(): - pyparsing_test = testing + pyparsing_test = testing # type: ignore[misc] core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs @@ -174,7 +166,9 @@ __all__ = [ "CaselessKeyword", "CaselessLiteral", "CharsNotIn", + "CloseMatch", "Combine", + "DelimitedList", "Dict", "Each", "Empty", @@ -227,9 +221,11 @@ __all__ = [ "alphas8bit", "any_close_tag", "any_open_tag", + "autoname_elements", "c_style_comment", "col", "common_html_entity", + "condition_as_parse_action", "counted_array", "cpp_style_comment", "dbl_quoted_string", @@ -241,6 +237,7 @@ __all__ = [ "html_comment", "identchars", "identbodychars", + "infix_notation", "java_style_comment", "line", "line_end", @@ -255,8 +252,12 @@ __all__ = [ "null_debug_action", "nums", "one_of", + "original_text_for", "printables", "punc8bit", + "pyparsing_common", + "pyparsing_test", + "pyparsing_unicode", "python_style_comment", "quoted_string", "remove_quotes", @@ -267,28 +268,20 @@ __all__ = [ "srange", "string_end", "string_start", + "token_map", "trace_parse_action", + "ungroup", + "unicode_set", "unicode_string", "with_attribute", - "indentedBlock", - "original_text_for", - "ungroup", - "infix_notation", - "locatedExpr", "with_class", - "CloseMatch", - "token_map", - "pyparsing_common", - "pyparsing_unicode", - "unicode_set", - "condition_as_parse_action", - "pyparsing_test", # pre-PEP8 compatibility names "__versionTime__", "anyCloseTag", "anyOpenTag", "cStyleComment", "commonHTMLEntity", + "conditionAsParseAction", "countedArray", "cppStyleComment", "dblQuotedString", @@ -296,9 +289,12 @@ __all__ = [ "delimitedList", "dictOf", "htmlComment", + "indentedBlock", + "infixNotation", "javaStyleComment", "lineEnd", "lineStart", + "locatedExpr", "makeHTMLTags", "makeXMLTags", "matchOnlyAtCol", @@ -308,6 +304,7 @@ __all__ = [ "nullDebugAction", "oneOf", "opAssoc", + "originalTextFor", "pythonStyleComment", "quotedString", "removeQuotes", @@ -317,15 +314,12 @@ __all__ = [ "sglQuotedString", "stringEnd", "stringStart", + "tokenMap", "traceParseAction", "unicodeString", "withAttribute", - "indentedBlock", - "originalTextFor", - "infixNotation", - "locatedExpr", "withClass", - "tokenMap", - "conditionAsParseAction", - "autoname_elements", + "common", + "unicode", + "testing", ] diff --git a/lib/pyparsing/actions.py b/lib/pyparsing/actions.py index f72c66e7..ca6e4c6a 100644 --- a/lib/pyparsing/actions.py +++ b/lib/pyparsing/actions.py @@ -1,7 +1,7 @@ # actions.py from .exceptions import ParseException -from .util import col +from .util import col, replaced_by_pep8 class OnlyOnce: @@ -38,7 +38,7 @@ def match_only_at_col(n): def verify_col(strg, locn, toks): if col(locn, strg) != n: - raise ParseException(strg, locn, "matched token not at column {}".format(n)) + raise ParseException(strg, locn, f"matched token not at column {n}") return verify_col @@ -148,15 +148,13 @@ def with_attribute(*args, **attr_dict): raise ParseException( s, l, - "attribute {!r} has value {!r}, must be {!r}".format( - attrName, tokens[attrName], attrValue - ), + f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}", ) return pa -with_attribute.ANY_VALUE = object() +with_attribute.ANY_VALUE = object() # type: ignore [attr-defined] def with_class(classname, namespace=""): @@ -195,13 +193,25 @@ def with_class(classname, namespace=""): 1 4 0 1 0 1,3 2,3 1,1 """ - classattr = "{}:class".format(namespace) if namespace else "class" + classattr = f"{namespace}:class" if namespace else "class" return with_attribute(**{classattr: classname}) # pre-PEP8 compatibility symbols -replaceWith = replace_with -removeQuotes = remove_quotes -withAttribute = with_attribute -withClass = with_class -matchOnlyAtCol = match_only_at_col +# fmt: off +@replaced_by_pep8(replace_with) +def replaceWith(): ... + +@replaced_by_pep8(remove_quotes) +def removeQuotes(): ... + +@replaced_by_pep8(with_attribute) +def withAttribute(): ... + +@replaced_by_pep8(with_class) +def withClass(): ... + +@replaced_by_pep8(match_only_at_col) +def matchOnlyAtCol(): ... + +# fmt: on diff --git a/lib/pyparsing/common.py b/lib/pyparsing/common.py index 1859fb79..7a666b27 100644 --- a/lib/pyparsing/common.py +++ b/lib/pyparsing/common.py @@ -1,6 +1,6 @@ # common.py from .core import * -from .helpers import delimited_list, any_open_tag, any_close_tag +from .helpers import DelimitedList, any_open_tag, any_close_tag from datetime import datetime @@ -22,17 +22,17 @@ class pyparsing_common: Parse actions: - - :class:`convertToInteger` - - :class:`convertToFloat` - - :class:`convertToDate` - - :class:`convertToDatetime` - - :class:`stripHTMLTags` - - :class:`upcaseTokens` - - :class:`downcaseTokens` + - :class:`convert_to_integer` + - :class:`convert_to_float` + - :class:`convert_to_date` + - :class:`convert_to_datetime` + - :class:`strip_html_tags` + - :class:`upcase_tokens` + - :class:`downcase_tokens` Example:: - pyparsing_common.number.runTests(''' + pyparsing_common.number.run_tests(''' # any int or real number, returned as the appropriate type 100 -100 @@ -42,7 +42,7 @@ class pyparsing_common: 1e-12 ''') - pyparsing_common.fnumber.runTests(''' + pyparsing_common.fnumber.run_tests(''' # any int or real number, returned as float 100 -100 @@ -52,19 +52,19 @@ class pyparsing_common: 1e-12 ''') - pyparsing_common.hex_integer.runTests(''' + pyparsing_common.hex_integer.run_tests(''' # hex numbers 100 FF ''') - pyparsing_common.fraction.runTests(''' + pyparsing_common.fraction.run_tests(''' # fractions 1/2 -3/4 ''') - pyparsing_common.mixed_integer.runTests(''' + pyparsing_common.mixed_integer.run_tests(''' # mixed fractions 1 1/2 @@ -73,8 +73,8 @@ class pyparsing_common: ''') import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(''' + pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID)) + pyparsing_common.uuid.run_tests(''' # uuid 12345678-1234-5678-1234-567812345678 ''') @@ -260,8 +260,8 @@ class pyparsing_common: Example:: date_expr = pyparsing_common.iso8601_date.copy() - date_expr.setParseAction(pyparsing_common.convertToDate()) - print(date_expr.parseString("1999-12-31")) + date_expr.set_parse_action(pyparsing_common.convert_to_date()) + print(date_expr.parse_string("1999-12-31")) prints:: @@ -287,8 +287,8 @@ class pyparsing_common: Example:: dt_expr = pyparsing_common.iso8601_datetime.copy() - dt_expr.setParseAction(pyparsing_common.convertToDatetime()) - print(dt_expr.parseString("1999-12-31T23:59:59.999")) + dt_expr.set_parse_action(pyparsing_common.convert_to_datetime()) + print(dt_expr.parse_string("1999-12-31T23:59:59.999")) prints:: @@ -326,9 +326,9 @@ class pyparsing_common: # strip HTML links from normal text text = 'More info at the pyparsing wiki page' - td, td_end = makeHTMLTags("TD") - table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end - print(table_text.parseString(text).body) + td, td_end = make_html_tags("TD") + table_text = td + SkipTo(td_end).set_parse_action(pyparsing_common.strip_html_tags)("body") + td_end + print(table_text.parse_string(text).body) Prints:: @@ -348,7 +348,7 @@ class pyparsing_common: .streamline() .set_name("commaItem") ) - comma_separated_list = delimited_list( + comma_separated_list = DelimitedList( Opt(quoted_string.copy() | _commasepitem, default="") ).set_name("comma separated list") """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" @@ -363,7 +363,7 @@ class pyparsing_common: url = Regex( # https://mathiasbynens.be/demo/url-regex # https://gist.github.com/dperini/729294 - r"^" + + r"(?P" + # protocol identifier (optional) # short syntax // still required r"(?:(?:(?Phttps?|ftp):)?\/\/)" + @@ -405,18 +405,26 @@ class pyparsing_common: r"(\?(?P[^#]*))?" + # fragment (optional) r"(#(?P\S*))?" + - r"$" + r")" ).set_name("url") + """URL (http/https/ftp scheme)""" # fmt: on # pre-PEP8 compatibility names convertToInteger = convert_to_integer + """Deprecated - use :class:`convert_to_integer`""" convertToFloat = convert_to_float + """Deprecated - use :class:`convert_to_float`""" convertToDate = convert_to_date + """Deprecated - use :class:`convert_to_date`""" convertToDatetime = convert_to_datetime + """Deprecated - use :class:`convert_to_datetime`""" stripHTMLTags = strip_html_tags + """Deprecated - use :class:`strip_html_tags`""" upcaseTokens = upcase_tokens + """Deprecated - use :class:`upcase_tokens`""" downcaseTokens = downcase_tokens + """Deprecated - use :class:`downcase_tokens`""" _builtin_exprs = [ diff --git a/lib/pyparsing/core.py b/lib/pyparsing/core.py index 9acba3f3..73514ed0 100644 --- a/lib/pyparsing/core.py +++ b/lib/pyparsing/core.py @@ -1,19 +1,22 @@ # # core.py # + +from collections import deque import os import typing from typing import ( - NamedTuple, - Union, - Callable, Any, + Callable, Generator, - Tuple, List, - TextIO, - Set, + NamedTuple, Sequence, + Set, + TextIO, + Tuple, + Union, + cast, ) from abc import ABC, abstractmethod from enum import Enum @@ -40,6 +43,7 @@ from .util import ( _flatten, LRUMemo as _LRUMemo, UnboundedMemo as _UnboundedMemo, + replaced_by_pep8, ) from .exceptions import * from .actions import * @@ -134,6 +138,7 @@ class __diag__(__config_flags): class Diagnostics(Enum): """ Diagnostic configuration (all default to disabled) + - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results @@ -228,6 +233,8 @@ _single_arg_builtins = { } _generatorType = types.GeneratorType +ParseImplReturnType = Tuple[int, Any] +PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] ParseAction = Union[ Callable[[], Any], Callable[[ParseResults], Any], @@ -256,7 +263,7 @@ hexnums = nums + "ABCDEFabcdef" alphanums = alphas + nums printables = "".join([c for c in string.printable if c not in string.whitespace]) -_trim_arity_call_line: traceback.StackSummary = None +_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] def _trim_arity(func, max_limit=3): @@ -269,11 +276,6 @@ def _trim_arity(func, max_limit=3): limit = 0 found_arity = False - def extract_tb(tb, limit=0): - frames = traceback.extract_tb(tb, limit=limit) - frame_summary = frames[-1] - return [frame_summary[:2]] - # synthesize what would be returned by traceback.extract_stack at the call to # user's parse action 'func', so that we don't incur call penalty at parse time @@ -297,8 +299,10 @@ def _trim_arity(func, max_limit=3): raise else: tb = te.__traceback__ + frames = traceback.extract_tb(tb, limit=2) + frame_summary = frames[-1] trim_arity_type_error = ( - extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth + [frame_summary[:2]][-1][:2] == pa_call_line_synth ) del tb @@ -320,7 +324,7 @@ def _trim_arity(func, max_limit=3): def condition_as_parse_action( - fn: ParseCondition, message: str = None, fatal: bool = False + fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False ) -> ParseAction: """ Function to convert a simple predicate function that returns ``True`` or ``False`` @@ -353,15 +357,9 @@ def _default_start_debug_action( cache_hit_str = "*" if cache_hit else "" print( ( - "{}Match {} at loc {}({},{})\n {}\n {}^".format( - cache_hit_str, - expr, - loc, - lineno(loc, instring), - col(loc, instring), - line(loc, instring), - " " * (col(loc, instring) - 1), - ) + f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" + f" {line(loc, instring)}\n" + f" {' ' * (col(loc, instring) - 1)}^" ) ) @@ -375,7 +373,7 @@ def _default_success_debug_action( cache_hit: bool = False, ): cache_hit_str = "*" if cache_hit else "" - print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.as_list())) + print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") def _default_exception_debug_action( @@ -386,11 +384,7 @@ def _default_exception_debug_action( cache_hit: bool = False, ): cache_hit_str = "*" if cache_hit else "" - print( - "{}Match {} failed, {} raised: {}".format( - cache_hit_str, expr, type(exc).__name__, exc - ) - ) + print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") def null_debug_action(*args): @@ -402,7 +396,7 @@ class ParserElement(ABC): DEFAULT_WHITE_CHARS: str = " \n\t\r" verbose_stacktrace: bool = False - _literalStringClass: typing.Optional[type] = None + _literalStringClass: type = None # type: ignore[assignment] @staticmethod def set_default_whitespace_chars(chars: str) -> None: @@ -447,6 +441,18 @@ class ParserElement(ABC): """ ParserElement._literalStringClass = cls + @classmethod + def using_each(cls, seq, **class_kwargs): + """ + Yields a sequence of class(obj, **class_kwargs) for obj in seq. + + Example:: + + LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") + + """ + yield from (cls(obj, **class_kwargs) for obj in seq) + class DebugActions(NamedTuple): debug_try: typing.Optional[DebugStartAction] debug_match: typing.Optional[DebugSuccessAction] @@ -455,9 +461,9 @@ class ParserElement(ABC): def __init__(self, savelist: bool = False): self.parseAction: List[ParseAction] = list() self.failAction: typing.Optional[ParseFailAction] = None - self.customName = None - self._defaultName = None - self.resultsName = None + self.customName: str = None # type: ignore[assignment] + self._defaultName: typing.Optional[str] = None + self.resultsName: str = None # type: ignore[assignment] self.saveAsList = savelist self.skipWhitespace = True self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) @@ -490,12 +496,29 @@ class ParserElement(ABC): base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) # statement would normally raise a warning, but is now suppressed - print(base.parseString("x")) + print(base.parse_string("x")) """ self.suppress_warnings_.append(warning_type) return self + def visit_all(self): + """General-purpose method to yield all expressions and sub-expressions + in a grammar. Typically just for internal use. + """ + to_visit = deque([self]) + seen = set() + while to_visit: + cur = to_visit.popleft() + + # guard against looping forever through recursive grammars + if cur in seen: + continue + seen.add(cur) + + to_visit.extend(cur.recurse()) + yield cur + def copy(self) -> "ParserElement": """ Make a copy of this :class:`ParserElement`. Useful for defining @@ -585,11 +608,11 @@ class ParserElement(ABC): pdb.set_trace() return _parseMethod(instring, loc, doActions, callPreParse) - breaker._originalParseMethod = _parseMethod - self._parse = breaker + breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] + self._parse = breaker # type: ignore [assignment] else: if hasattr(self._parse, "_originalParseMethod"): - self._parse = self._parse._originalParseMethod + self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment] return self def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": @@ -601,9 +624,9 @@ class ParserElement(ABC): Each parse action ``fn`` is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: - - s = the original string being parsed (see note below) - - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object + - ``s`` = the original string being parsed (see note below) + - ``loc`` = the location of the matching substring + - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object The parsed tokens are passed to the parse action as ParseResults. They can be modified in place using list-style append, extend, and pop operations to update @@ -621,7 +644,7 @@ class ParserElement(ABC): Optional keyword arguments: - - call_during_try = (default= ``False``) indicate if parse action should be run during + - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing. For parse actions that have side effects, it is important to only call the parse action once it is determined that it is being called as part of a successful parse. For parse actions that perform additional @@ -697,10 +720,10 @@ class ParserElement(ABC): Optional keyword arguments: - - message = define a custom message to be used in the raised exception - - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise + - ``message`` = define a custom message to be used in the raised exception + - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException - - call_during_try = boolean to indicate if this method should be called during internal tryParse calls, + - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, default=False Example:: @@ -716,7 +739,9 @@ class ParserElement(ABC): for fn in fns: self.parseAction.append( condition_as_parse_action( - fn, message=kwargs.get("message"), fatal=kwargs.get("fatal", False) + fn, + message=str(kwargs.get("message")), + fatal=bool(kwargs.get("fatal", False)), ) ) @@ -731,30 +756,38 @@ class ParserElement(ABC): Fail acton fn is a callable function that takes the arguments ``fn(s, loc, expr, err)`` where: - - s = string being parsed - - loc = location where expression match was attempted and failed - - expr = the parse expression that failed - - err = the exception thrown + - ``s`` = string being parsed + - ``loc`` = location where expression match was attempted and failed + - ``expr`` = the parse expression that failed + - ``err`` = the exception thrown The function returns no value. It may throw :class:`ParseFatalException` if it is desired to stop parsing immediately.""" self.failAction = fn return self - def _skipIgnorables(self, instring, loc): + def _skipIgnorables(self, instring: str, loc: int) -> int: + if not self.ignoreExprs: + return loc exprsFound = True + ignore_expr_fns = [e._parse for e in self.ignoreExprs] + last_loc = loc while exprsFound: exprsFound = False - for e in self.ignoreExprs: + for ignore_fn in ignore_expr_fns: try: while 1: - loc, dummy = e._parse(instring, loc) + loc, dummy = ignore_fn(instring, loc) exprsFound = True except ParseException: pass + # check if all ignore exprs matched but didn't actually advance the parse location + if loc == last_loc: + break + last_loc = loc return loc - def preParse(self, instring, loc): + def preParse(self, instring: str, loc: int) -> int: if self.ignoreExprs: loc = self._skipIgnorables(instring, loc) @@ -830,7 +863,7 @@ class ParserElement(ABC): try: for fn in self.parseAction: try: - tokens = fn(instring, tokens_start, ret_tokens) + tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] except IndexError as parse_action_exc: exc = ParseException("exception raised in parse action") raise exc from parse_action_exc @@ -853,7 +886,7 @@ class ParserElement(ABC): else: for fn in self.parseAction: try: - tokens = fn(instring, tokens_start, ret_tokens) + tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] except IndexError as parse_action_exc: exc = ParseException("exception raised in parse action") raise exc from parse_action_exc @@ -875,17 +908,24 @@ class ParserElement(ABC): return loc, ret_tokens - def try_parse(self, instring: str, loc: int, raise_fatal: bool = False) -> int: + def try_parse( + self, + instring: str, + loc: int, + *, + raise_fatal: bool = False, + do_actions: bool = False, + ) -> int: try: - return self._parse(instring, loc, doActions=False)[0] + return self._parse(instring, loc, doActions=do_actions)[0] except ParseFatalException: if raise_fatal: raise raise ParseException(instring, loc, self.errmsg, self) - def can_parse_next(self, instring: str, loc: int) -> bool: + def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: try: - self.try_parse(instring, loc) + self.try_parse(instring, loc, do_actions=do_actions) except (ParseException, IndexError): return False else: @@ -897,10 +937,23 @@ class ParserElement(ABC): Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] ] = {} + class _CacheType(dict): + """ + class to help type checking + """ + + not_in_cache: bool + + def get(self, *args): + ... + + def set(self, *args): + ... + # argument cache for optimizing repeated calls when backtracking through recursive expressions packrat_cache = ( - {} - ) # this is set later by enabled_packrat(); this is here so that reset_cache() doesn't fail + _CacheType() + ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail packrat_cache_lock = RLock() packrat_cache_stats = [0, 0] @@ -930,24 +983,25 @@ class ParserElement(ABC): ParserElement.packrat_cache_stats[HIT] += 1 if self.debug and self.debugActions.debug_try: try: - self.debugActions.debug_try(instring, loc, self, cache_hit=True) + self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] except TypeError: pass if isinstance(value, Exception): if self.debug and self.debugActions.debug_fail: try: self.debugActions.debug_fail( - instring, loc, self, value, cache_hit=True + instring, loc, self, value, cache_hit=True # type: ignore [call-arg] ) except TypeError: pass raise value + value = cast(Tuple[int, ParseResults, int], value) loc_, result, endloc = value[0], value[1].copy(), value[2] if self.debug and self.debugActions.debug_match: try: self.debugActions.debug_match( - instring, loc_, endloc, self, result, cache_hit=True + instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] ) except TypeError: pass @@ -1009,7 +1063,7 @@ class ParserElement(ABC): Parameters: - - cache_size_limit - (default=``None``) - memoize at most this many + - ``cache_size_limit`` - (default=``None``) - memoize at most this many ``Forward`` elements during matching; if ``None`` (the default), memoize all ``Forward`` elements. @@ -1022,15 +1076,17 @@ class ParserElement(ABC): elif ParserElement._packratEnabled: raise RuntimeError("Packrat and Bounded Recursion are not compatible") if cache_size_limit is None: - ParserElement.recursion_memos = _UnboundedMemo() + ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment] elif cache_size_limit > 0: - ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) + ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] else: raise NotImplementedError("Memo size of %s" % cache_size_limit) ParserElement._left_recursion_enabled = True @staticmethod - def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None: + def enable_packrat( + cache_size_limit: Union[int, None] = 128, *, force: bool = False + ) -> None: """ Enables "packrat" parsing, which adds memoizing to the parsing logic. Repeated parse attempts at the same string location (which happens @@ -1040,7 +1096,7 @@ class ParserElement(ABC): Parameters: - - cache_size_limit - (default= ``128``) - if an integer value is provided + - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided will limit the size of the packrat cache; if None is passed, then the cache size will be unbounded; if 0 is passed, the cache will be effectively disabled. @@ -1070,7 +1126,7 @@ class ParserElement(ABC): if cache_size_limit is None: ParserElement.packrat_cache = _UnboundedCache() else: - ParserElement.packrat_cache = _FifoCache(cache_size_limit) + ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment] ParserElement._parse = ParserElement._parseCache def parse_string( @@ -1088,7 +1144,7 @@ class ParserElement(ABC): an object with attributes if the given parser includes results names. If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This - is also equivalent to ending the grammar with :class:`StringEnd`(). + is also equivalent to ending the grammar with :class:`StringEnd`\\ (). To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string @@ -1198,7 +1254,9 @@ class ParserElement(ABC): try: while loc <= instrlen and matches < maxMatches: try: - preloc = preparseFn(instring, loc) + preloc: int = preparseFn(instring, loc) + nextLoc: int + tokens: ParseResults nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) except ParseException: loc = preloc + 1 @@ -1352,7 +1410,7 @@ class ParserElement(ABC): def __add__(self, other) -> "ParserElement": """ Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` - converts them to :class:`Literal`s by default. + converts them to :class:`Literal`\\ s by default. Example:: @@ -1364,11 +1422,11 @@ class ParserElement(ABC): Hello, World! -> ['Hello', ',', 'World', '!'] - ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. + ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: Literal('start') + ... + Literal('end') - is equivalent to: + is equivalent to:: Literal('start') + SkipTo('end')("_skipped*") + Literal('end') @@ -1382,11 +1440,7 @@ class ParserElement(ABC): if isinstance(other, str_type): other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return And([self, other]) def __radd__(self, other) -> "ParserElement": @@ -1399,11 +1453,7 @@ class ParserElement(ABC): if isinstance(other, str_type): other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return other + self def __sub__(self, other) -> "ParserElement": @@ -1413,11 +1463,7 @@ class ParserElement(ABC): if isinstance(other, str_type): other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return self + And._ErrorStop() + other def __rsub__(self, other) -> "ParserElement": @@ -1427,11 +1473,7 @@ class ParserElement(ABC): if isinstance(other, str_type): other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return other - self def __mul__(self, other) -> "ParserElement": @@ -1440,11 +1482,12 @@ class ParserElement(ABC): ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples may also include ``None`` as in: + - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent - to ``expr*n + ZeroOrMore(expr)`` - (read as "at least n instances of ``expr``") + to ``expr*n + ZeroOrMore(expr)`` + (read as "at least n instances of ``expr``") - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` - (read as "0 to n instances of ``expr``") + (read as "0 to n instances of ``expr``") - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` @@ -1477,17 +1520,9 @@ class ParserElement(ABC): minElements, optElements = other optElements -= minElements else: - raise TypeError( - "cannot multiply ParserElement and ({}) objects".format( - ",".join(type(item).__name__ for item in other) - ) - ) + return NotImplemented else: - raise TypeError( - "cannot multiply ParserElement and {} objects".format( - type(other).__name__ - ) - ) + return NotImplemented if minElements < 0: raise ValueError("cannot multiply ParserElement by negative value") @@ -1531,13 +1566,12 @@ class ParserElement(ABC): return _PendingSkip(self, must_skip=True) if isinstance(other, str_type): + # `expr | ""` is equivalent to `Opt(expr)` + if other == "": + return Opt(self) other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return MatchFirst([self, other]) def __ror__(self, other) -> "ParserElement": @@ -1547,11 +1581,7 @@ class ParserElement(ABC): if isinstance(other, str_type): other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return other | self def __xor__(self, other) -> "ParserElement": @@ -1561,11 +1591,7 @@ class ParserElement(ABC): if isinstance(other, str_type): other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return Or([self, other]) def __rxor__(self, other) -> "ParserElement": @@ -1575,11 +1601,7 @@ class ParserElement(ABC): if isinstance(other, str_type): other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return other ^ self def __and__(self, other) -> "ParserElement": @@ -1589,11 +1611,7 @@ class ParserElement(ABC): if isinstance(other, str_type): other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return Each([self, other]) def __rand__(self, other) -> "ParserElement": @@ -1603,11 +1621,7 @@ class ParserElement(ABC): if isinstance(other, str_type): other = self._literalStringClass(other) if not isinstance(other, ParserElement): - raise TypeError( - "Cannot combine element of type {} with ParserElement".format( - type(other).__name__ - ) - ) + return NotImplemented return other & self def __invert__(self) -> "ParserElement": @@ -1636,38 +1650,58 @@ class ParserElement(ABC): ``None`` may be used in place of ``...``. - Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception - if more than ``n`` ``expr``s exist in the input stream. If this behavior is + Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception + if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is desired, then write ``expr[..., n] + ~expr``. + + For repetition with a stop_on expression, use slice notation: + + - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` + - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` + """ + stop_on_defined = False + stop_on = NoMatch() + if isinstance(key, slice): + key, stop_on = key.start, key.stop + if key is None: + key = ... + stop_on_defined = True + elif isinstance(key, tuple) and isinstance(key[-1], slice): + key, stop_on = (key[0], key[1].start), key[1].stop + stop_on_defined = True + # convert single arg keys to tuples + if isinstance(key, str_type): + key = (key,) try: - if isinstance(key, str_type): - key = (key,) iter(key) except TypeError: key = (key, key) if len(key) > 2: raise TypeError( - "only 1 or 2 index arguments supported ({}{})".format( - key[:5], "... [{}]".format(len(key)) if len(key) > 5 else "" - ) + f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" ) # clip to 2 elements ret = self * tuple(key[:2]) + ret = typing.cast(_MultipleMatch, ret) + + if stop_on_defined: + ret.stopOn(stop_on) + return ret - def __call__(self, name: str = None) -> "ParserElement": + def __call__(self, name: typing.Optional[str] = None) -> "ParserElement": """ Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be passed as ``True``. - If ``name` is omitted, same as calling :class:`copy`. + If ``name`` is omitted, same as calling :class:`copy`. Example:: @@ -1775,17 +1809,18 @@ class ParserElement(ABC): should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` """ self.debugActions = self.DebugActions( - start_action or _default_start_debug_action, - success_action or _default_success_debug_action, - exception_action or _default_exception_debug_action, + start_action or _default_start_debug_action, # type: ignore[truthy-function] + success_action or _default_success_debug_action, # type: ignore[truthy-function] + exception_action or _default_exception_debug_action, # type: ignore[truthy-function] ) self.debug = True return self - def set_debug(self, flag: bool = True) -> "ParserElement": + def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement": """ Enable display of debugging messages while doing pattern matching. Set ``flag`` to ``True`` to enable, ``False`` to disable. + Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. Example:: @@ -1819,6 +1854,11 @@ class ParserElement(ABC): which makes debugging and exception messages easier to understand - for instance, the default name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. """ + if recurse: + for expr in self.visit_all(): + expr.set_debug(flag, recurse=False) + return self + if flag: self.set_debug_actions( _default_start_debug_action, @@ -1836,7 +1876,7 @@ class ParserElement(ABC): return self._defaultName @abstractmethod - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: """ Child classes must define this method, which defines how the ``default_name`` is set. """ @@ -1844,7 +1884,9 @@ class ParserElement(ABC): def set_name(self, name: str) -> "ParserElement": """ Define name for this expression, makes debugging and exception messages clearer. + Example:: + Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) """ @@ -1870,7 +1912,7 @@ class ParserElement(ABC): self._defaultName = None return self - def recurse(self) -> Sequence["ParserElement"]: + def recurse(self) -> List["ParserElement"]: return [] def _checkRecursion(self, parseElementList): @@ -1882,6 +1924,11 @@ class ParserElement(ABC): """ Check defined expressions for valid structure, check for infinite recursive definitions. """ + warnings.warn( + "ParserElement.validate() is deprecated, and should not be used to check for left recursion", + DeprecationWarning, + stacklevel=2, + ) self._checkRecursion([]) def parse_file( @@ -1899,8 +1946,10 @@ class ParserElement(ABC): """ parseAll = parseAll or parse_all try: + file_or_filename = typing.cast(TextIO, file_or_filename) file_contents = file_or_filename.read() except AttributeError: + file_or_filename = typing.cast(str, file_or_filename) with open(file_or_filename, "r", encoding=encoding) as f: file_contents = f.read() try: @@ -1932,6 +1981,7 @@ class ParserElement(ABC): inline microtests of sub expressions while building up larger parser. Parameters: + - ``test_string`` - to test against this expression for a match - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests @@ -1955,7 +2005,7 @@ class ParserElement(ABC): full_dump: bool = True, print_results: bool = True, failure_tests: bool = False, - post_parse: Callable[[str, ParseResults], str] = None, + post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None, file: typing.Optional[TextIO] = None, with_line_numbers: bool = False, *, @@ -1963,7 +2013,7 @@ class ParserElement(ABC): fullDump: bool = True, printResults: bool = True, failureTests: bool = False, - postParse: Callable[[str, ParseResults], str] = None, + postParse: typing.Optional[Callable[[str, ParseResults], str]] = None, ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: """ Execute the parse expression on a series of test strings, showing each @@ -1971,6 +2021,7 @@ class ParserElement(ABC): run a parse expression against a list of sample strings. Parameters: + - ``tests`` - a list of separate test strings, or a multiline string of test strings - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test @@ -2067,22 +2118,27 @@ class ParserElement(ABC): failureTests = failureTests or failure_tests postParse = postParse or post_parse if isinstance(tests, str_type): + tests = typing.cast(str, tests) line_strip = type(tests).strip tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] - if isinstance(comment, str_type): - comment = Literal(comment) + comment_specified = comment is not None + if comment_specified: + if isinstance(comment, str_type): + comment = typing.cast(str, comment) + comment = Literal(comment) + comment = typing.cast(ParserElement, comment) if file is None: file = sys.stdout print_ = file.write result: Union[ParseResults, Exception] - allResults = [] - comments = [] + allResults: List[Tuple[str, Union[ParseResults, Exception]]] = [] + comments: List[str] = [] success = True NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) BOM = "\ufeff" for t in tests: - if comment is not None and comment.matches(t, False) or comments and not t: + if comment_specified and comment.matches(t, False) or comments and not t: comments.append( pyparsing_test.with_line_numbers(t) if with_line_numbers else t ) @@ -2107,7 +2163,7 @@ class ParserElement(ABC): success = success and failureTests result = pe except Exception as exc: - out.append("FAIL-EXCEPTION: {}: {}".format(type(exc).__name__, exc)) + out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}") if ParserElement.verbose_stacktrace: out.extend(traceback.format_tb(exc.__traceback__)) success = success and failureTests @@ -2127,9 +2183,7 @@ class ParserElement(ABC): except Exception as e: out.append(result.dump(full=fullDump)) out.append( - "{} failed: {}: {}".format( - postParse.__name__, type(e).__name__, e - ) + f"{postParse.__name__} failed: {type(e).__name__}: {e}" ) else: out.append(result.dump(full=fullDump)) @@ -2148,19 +2202,28 @@ class ParserElement(ABC): vertical: int = 3, show_results_names: bool = False, show_groups: bool = False, + embed: bool = False, **kwargs, ) -> None: """ Create a railroad diagram for the parser. Parameters: - - output_html (str or file-like object) - output target for generated + + - ``output_html`` (str or file-like object) - output target for generated diagram HTML - - vertical (int) - threshold for formatting multiple alternatives vertically + - ``vertical`` (int) - threshold for formatting multiple alternatives vertically instead of horizontally (default=3) - - show_results_names - bool flag whether diagram should show annotations for + - ``show_results_names`` - bool flag whether diagram should show annotations for defined results names - - show_groups - bool flag whether groups should be highlighted with an unlabeled surrounding box + - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box + - ``embed`` - bool flag whether generated HTML should omit , , and tags to embed + the resulting HTML in an enclosing HTML source + - ``head`` - str containing additional HTML to insert into the section of the generated code; + can be used to insert custom CSS styling + - ``body`` - str containing additional HTML to insert at the beginning of the section of the + generated code + Additional diagram-formatting keyword arguments can also be included; see railroad.Diagram class. """ @@ -2183,38 +2246,93 @@ class ParserElement(ABC): ) if isinstance(output_html, (str, Path)): with open(output_html, "w", encoding="utf-8") as diag_file: - diag_file.write(railroad_to_html(railroad)) + diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) else: # we were passed a file-like object, just write to it - output_html.write(railroad_to_html(railroad)) + output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) + + # Compatibility synonyms + # fmt: off + @staticmethod + @replaced_by_pep8(inline_literals_using) + def inlineLiteralsUsing(): ... + + @staticmethod + @replaced_by_pep8(set_default_whitespace_chars) + def setDefaultWhitespaceChars(): ... + + @replaced_by_pep8(set_results_name) + def setResultsName(self): ... + + @replaced_by_pep8(set_break) + def setBreak(self): ... + + @replaced_by_pep8(set_parse_action) + def setParseAction(self): ... + + @replaced_by_pep8(add_parse_action) + def addParseAction(self): ... + + @replaced_by_pep8(add_condition) + def addCondition(self): ... + + @replaced_by_pep8(set_fail_action) + def setFailAction(self): ... + + @replaced_by_pep8(try_parse) + def tryParse(self): ... + + @staticmethod + @replaced_by_pep8(enable_left_recursion) + def enableLeftRecursion(): ... + + @staticmethod + @replaced_by_pep8(enable_packrat) + def enablePackrat(): ... + + @replaced_by_pep8(parse_string) + def parseString(self): ... + + @replaced_by_pep8(scan_string) + def scanString(self): ... + + @replaced_by_pep8(transform_string) + def transformString(self): ... + + @replaced_by_pep8(search_string) + def searchString(self): ... + + @replaced_by_pep8(ignore_whitespace) + def ignoreWhitespace(self): ... + + @replaced_by_pep8(leave_whitespace) + def leaveWhitespace(self): ... + + @replaced_by_pep8(set_whitespace_chars) + def setWhitespaceChars(self): ... + + @replaced_by_pep8(parse_with_tabs) + def parseWithTabs(self): ... + + @replaced_by_pep8(set_debug_actions) + def setDebugActions(self): ... + + @replaced_by_pep8(set_debug) + def setDebug(self): ... + + @replaced_by_pep8(set_name) + def setName(self): ... + + @replaced_by_pep8(parse_file) + def parseFile(self): ... + + @replaced_by_pep8(run_tests) + def runTests(self): ... - setDefaultWhitespaceChars = set_default_whitespace_chars - inlineLiteralsUsing = inline_literals_using - setResultsName = set_results_name - setBreak = set_break - setParseAction = set_parse_action - addParseAction = add_parse_action - addCondition = add_condition - setFailAction = set_fail_action - tryParse = try_parse canParseNext = can_parse_next resetCache = reset_cache - enableLeftRecursion = enable_left_recursion - enablePackrat = enable_packrat - parseString = parse_string - scanString = scan_string - searchString = search_string - transformString = transform_string - setWhitespaceChars = set_whitespace_chars - parseWithTabs = parse_with_tabs - setDebugActions = set_debug_actions - setDebug = set_debug defaultName = default_name - setName = set_name - parseFile = parse_file - runTests = run_tests - ignoreWhitespace = ignore_whitespace - leaveWhitespace = leave_whitespace + # fmt: on class _PendingSkip(ParserElement): @@ -2225,7 +2343,7 @@ class _PendingSkip(ParserElement): self.anchor = expr self.must_skip = must_skip - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return str(self.anchor + Empty()).replace("Empty", "...") def __add__(self, other) -> "ParserElement": @@ -2266,21 +2384,10 @@ class Token(ParserElement): def __init__(self): super().__init__(savelist=False) - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return type(self).__name__ -class Empty(Token): - """ - An empty token, will always match. - """ - - def __init__(self): - super().__init__() - self.mayReturnEmpty = True - self.mayIndexError = False - - class NoMatch(Token): """ A token that will never match. @@ -2312,25 +2419,33 @@ class Literal(Token): use :class:`Keyword` or :class:`CaselessKeyword`. """ + def __new__(cls, match_string: str = "", *, matchString: str = ""): + # Performance tuning: select a subclass with optimized parseImpl + if cls is Literal: + match_string = matchString or match_string + if not match_string: + return super().__new__(Empty) + if len(match_string) == 1: + return super().__new__(_SingleCharLiteral) + + # Default behavior + return super().__new__(cls) + + # Needed to make copy.copy() work correctly if we customize __new__ + def __getnewargs__(self): + return (self.match,) + def __init__(self, match_string: str = "", *, matchString: str = ""): super().__init__() match_string = matchString or match_string self.match = match_string self.matchLen = len(match_string) - try: - self.firstMatchChar = match_string[0] - except IndexError: - raise ValueError("null string passed to Literal; use Empty() instead") + self.firstMatchChar = match_string[:1] self.errmsg = "Expected " + self.name self.mayReturnEmpty = False self.mayIndexError = False - # Performance tuning: modify __class__ to select - # a parseImpl optimized for single-character check - if self.matchLen == 1 and type(self) is Literal: - self.__class__ = _SingleCharLiteral - - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return repr(self.match) def parseImpl(self, instring, loc, doActions=True): @@ -2341,6 +2456,23 @@ class Literal(Token): raise ParseException(instring, loc, self.errmsg, self) +class Empty(Literal): + """ + An empty token, will always match. + """ + + def __init__(self, match_string="", *, matchString=""): + super().__init__("") + self.mayReturnEmpty = True + self.mayIndexError = False + + def _generateDefaultName(self) -> str: + return "Empty" + + def parseImpl(self, instring, loc, doActions=True): + return loc, [] + + class _SingleCharLiteral(Literal): def parseImpl(self, instring, loc, doActions=True): if instring[loc] == self.firstMatchChar: @@ -2354,8 +2486,8 @@ ParserElement._literalStringClass = Literal class Keyword(Token): """ Token to exactly match a specified string as a keyword, that is, - it must be immediately followed by a non-keyword character. Compare - with :class:`Literal`: + it must be immediately preceded and followed by whitespace or + non-keyword characters. Compare with :class:`Literal`: - ``Literal("if")`` will match the leading ``'if'`` in ``'ifAndOnlyIf'``. @@ -2365,7 +2497,7 @@ class Keyword(Token): Accepts two optional constructor arguments in addition to the keyword string: - - ``identChars`` is a string of characters that would be valid + - ``ident_chars`` is a string of characters that would be valid identifier characters, defaulting to all alphanumerics + "_" and "$" - ``caseless`` allows case-insensitive matching, default is ``False``. @@ -2400,7 +2532,7 @@ class Keyword(Token): self.firstMatchChar = match_string[0] except IndexError: raise ValueError("null string passed to Keyword; use Empty() instead") - self.errmsg = "Expected {} {}".format(type(self).__name__, self.name) + self.errmsg = f"Expected {type(self).__name__} {self.name}" self.mayReturnEmpty = False self.mayIndexError = False self.caseless = caseless @@ -2409,7 +2541,7 @@ class Keyword(Token): identChars = identChars.upper() self.identChars = set(identChars) - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return repr(self.match) def parseImpl(self, instring, loc, doActions=True): @@ -2559,7 +2691,7 @@ class CloseMatch(Token): def __init__( self, match_string: str, - max_mismatches: int = None, + max_mismatches: typing.Optional[int] = None, *, maxMismatches: int = 1, caseless=False, @@ -2568,15 +2700,13 @@ class CloseMatch(Token): super().__init__() self.match_string = match_string self.maxMismatches = maxMismatches - self.errmsg = "Expected {!r} (with up to {} mismatches)".format( - self.match_string, self.maxMismatches - ) + self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" self.caseless = caseless self.mayIndexError = False self.mayReturnEmpty = False - def _generateDefaultName(self): - return "{}:{!r}".format(type(self).__name__, self.match_string) + def _generateDefaultName(self) -> str: + return f"{type(self).__name__}:{self.match_string!r}" def parseImpl(self, instring, loc, doActions=True): start = loc @@ -2612,7 +2742,9 @@ class CloseMatch(Token): class Word(Token): """Token for matching words composed of allowed character sets. + Parameters: + - ``init_chars`` - string of all characters that should be used to match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; if ``body_chars`` is also specified, then this is the string of @@ -2697,26 +2829,24 @@ class Word(Token): super().__init__() if not initChars: raise ValueError( - "invalid {}, initChars cannot be empty string".format( - type(self).__name__ - ) + f"invalid {type(self).__name__}, initChars cannot be empty string" ) - initChars = set(initChars) - self.initChars = initChars + initChars_set = set(initChars) if excludeChars: - excludeChars = set(excludeChars) - initChars -= excludeChars + excludeChars_set = set(excludeChars) + initChars_set -= excludeChars_set if bodyChars: - bodyChars = set(bodyChars) - excludeChars - self.initCharsOrig = "".join(sorted(initChars)) + bodyChars = "".join(set(bodyChars) - excludeChars_set) + self.initChars = initChars_set + self.initCharsOrig = "".join(sorted(initChars_set)) if bodyChars: - self.bodyCharsOrig = "".join(sorted(bodyChars)) self.bodyChars = set(bodyChars) + self.bodyCharsOrig = "".join(sorted(bodyChars)) else: - self.bodyCharsOrig = "".join(sorted(initChars)) - self.bodyChars = set(initChars) + self.bodyChars = initChars_set + self.bodyCharsOrig = self.initCharsOrig self.maxSpecified = max > 0 @@ -2725,6 +2855,11 @@ class Word(Token): "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" ) + if self.maxSpecified and min > max: + raise ValueError( + f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" + ) + self.minLen = min if max > 0: @@ -2733,62 +2868,64 @@ class Word(Token): self.maxLen = _MAX_INT if exact > 0: + min = max = exact self.maxLen = exact self.minLen = exact self.errmsg = "Expected " + self.name self.mayIndexError = False self.asKeyword = asKeyword + if self.asKeyword: + self.errmsg += " as a keyword" # see if we can make a regex for this Word - if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0): + if " " not in (self.initChars | self.bodyChars): + if len(self.initChars) == 1: + re_leading_fragment = re.escape(self.initCharsOrig) + else: + re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" + if self.bodyChars == self.initChars: - if max == 0: + if max == 0 and self.minLen == 1: repeat = "+" elif max == 1: repeat = "" else: - repeat = "{{{},{}}}".format( - self.minLen, "" if self.maxLen == _MAX_INT else self.maxLen - ) - self.reString = "[{}]{}".format( - _collapse_string_to_ranges(self.initChars), - repeat, - ) - elif len(self.initChars) == 1: - if max == 0: - repeat = "*" - else: - repeat = "{{0,{}}}".format(max - 1) - self.reString = "{}[{}]{}".format( - re.escape(self.initCharsOrig), - _collapse_string_to_ranges(self.bodyChars), - repeat, - ) + if self.minLen != self.maxLen: + repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" + else: + repeat = f"{{{self.minLen}}}" + self.reString = f"{re_leading_fragment}{repeat}" else: - if max == 0: - repeat = "*" - elif max == 2: + if max == 1: + re_body_fragment = "" repeat = "" else: - repeat = "{{0,{}}}".format(max - 1) - self.reString = "[{}][{}]{}".format( - _collapse_string_to_ranges(self.initChars), - _collapse_string_to_ranges(self.bodyChars), - repeat, - ) + re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" + if max == 0 and self.minLen == 1: + repeat = "*" + elif max == 2: + repeat = "?" if min <= 1 else "" + else: + if min != max: + repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" + else: + repeat = f"{{{min - 1 if min > 0 else ''}}}" + + self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" + if self.asKeyword: - self.reString = r"\b" + self.reString + r"\b" + self.reString = rf"\b{self.reString}\b" try: self.re = re.compile(self.reString) except re.error: - self.re = None + self.re = None # type: ignore[assignment] else: self.re_match = self.re.match - self.__class__ = _WordRegex + self.parseImpl = self.parseImpl_regex # type: ignore[assignment] - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: def charsAsStr(s): max_repr_len = 16 s = _collapse_string_to_ranges(s, re_escape=False) @@ -2798,11 +2935,9 @@ class Word(Token): return s if self.initChars != self.bodyChars: - base = "W:({}, {})".format( - charsAsStr(self.initChars), charsAsStr(self.bodyChars) - ) + base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" else: - base = "W:({})".format(charsAsStr(self.initChars)) + base = f"W:({charsAsStr(self.initChars)})" # add length specification if self.minLen > 1 or self.maxLen != _MAX_INT: @@ -2810,11 +2945,11 @@ class Word(Token): if self.minLen == 1: return base[2:] else: - return base + "{{{}}}".format(self.minLen) + return base + f"{{{self.minLen}}}" elif self.maxLen == _MAX_INT: - return base + "{{{},...}}".format(self.minLen) + return base + f"{{{self.minLen},...}}" else: - return base + "{{{},{}}}".format(self.minLen, self.maxLen) + return base + f"{{{self.minLen},{self.maxLen}}}" return base def parseImpl(self, instring, loc, doActions=True): @@ -2849,9 +2984,7 @@ class Word(Token): return loc, instring[start:loc] - -class _WordRegex(Word): - def parseImpl(self, instring, loc, doActions=True): + def parseImpl_regex(self, instring, loc, doActions=True): result = self.re_match(instring, loc) if not result: raise ParseException(instring, loc, self.errmsg, self) @@ -2860,7 +2993,7 @@ class _WordRegex(Word): return loc, result.group() -class Char(_WordRegex): +class Char(Word): """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, when defining a match of any single character in a string of characters. @@ -2878,13 +3011,8 @@ class Char(_WordRegex): asKeyword = asKeyword or as_keyword excludeChars = excludeChars or exclude_chars super().__init__( - charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars + charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars ) - self.reString = "[{}]".format(_collapse_string_to_ranges(self.initChars)) - if asKeyword: - self.reString = r"\b{}\b".format(self.reString) - self.re = re.compile(self.reString) - self.re_match = self.re.match class Regex(Token): @@ -2954,9 +3082,9 @@ class Regex(Token): self.asGroupList = asGroupList self.asMatch = asMatch if self.asGroupList: - self.parseImpl = self.parseImplAsGroupList + self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment] if self.asMatch: - self.parseImpl = self.parseImplAsMatch + self.parseImpl = self.parseImplAsMatch # type: ignore [assignment] @cached_property def re(self): @@ -2966,9 +3094,7 @@ class Regex(Token): try: return re.compile(self.pattern, self.flags) except re.error: - raise ValueError( - "invalid pattern ({!r}) passed to Regex".format(self.pattern) - ) + raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") @cached_property def re_match(self): @@ -2978,7 +3104,7 @@ class Regex(Token): def mayReturnEmpty(self): return self.re_match("") is not None - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) def parseImpl(self, instring, loc, doActions=True): @@ -3024,10 +3150,12 @@ class Regex(Token): # prints "

main title

" """ if self.asGroupList: - raise TypeError("cannot use sub() with Regex(asGroupList=True)") + raise TypeError("cannot use sub() with Regex(as_group_list=True)") if self.asMatch and callable(repl): - raise TypeError("cannot use sub() with a callable with Regex(asMatch=True)") + raise TypeError( + "cannot use sub() with a callable with Regex(as_match=True)" + ) if self.asMatch: @@ -3081,7 +3209,7 @@ class QuotedString(Token): [['This is the "quote"']] [['This is the quote with "embedded" quotes']] """ - ws_map = ((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")) + ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) def __init__( self, @@ -3101,139 +3229,164 @@ class QuotedString(Token): convertWhitespaceEscapes: bool = True, ): super().__init__() - escChar = escChar or esc_char - escQuote = escQuote or esc_quote - unquoteResults = unquoteResults and unquote_results - endQuoteChar = endQuoteChar or end_quote_char - convertWhitespaceEscapes = ( + esc_char = escChar or esc_char + esc_quote = escQuote or esc_quote + unquote_results = unquoteResults and unquote_results + end_quote_char = endQuoteChar or end_quote_char + convert_whitespace_escapes = ( convertWhitespaceEscapes and convert_whitespace_escapes ) quote_char = quoteChar or quote_char - # remove white space from quote chars - wont work anyway + # remove white space from quote chars quote_char = quote_char.strip() if not quote_char: raise ValueError("quote_char cannot be the empty string") - if endQuoteChar is None: - endQuoteChar = quote_char + if end_quote_char is None: + end_quote_char = quote_char else: - endQuoteChar = endQuoteChar.strip() - if not endQuoteChar: - raise ValueError("endQuoteChar cannot be the empty string") + end_quote_char = end_quote_char.strip() + if not end_quote_char: + raise ValueError("end_quote_char cannot be the empty string") - self.quoteChar = quote_char - self.quoteCharLen = len(quote_char) - self.firstQuoteChar = quote_char[0] - self.endQuoteChar = endQuoteChar - self.endQuoteCharLen = len(endQuoteChar) - self.escChar = escChar - self.escQuote = escQuote - self.unquoteResults = unquoteResults - self.convertWhitespaceEscapes = convertWhitespaceEscapes + self.quote_char: str = quote_char + self.quote_char_len: int = len(quote_char) + self.first_quote_char: str = quote_char[0] + self.end_quote_char: str = end_quote_char + self.end_quote_char_len: int = len(end_quote_char) + self.esc_char: str = esc_char or "" + self.has_esc_char: bool = esc_char is not None + self.esc_quote: str = esc_quote or "" + self.unquote_results: bool = unquote_results + self.convert_whitespace_escapes: bool = convert_whitespace_escapes + self.multiline = multiline + self.re_flags = re.RegexFlag(0) - sep = "" - inner_pattern = "" + # fmt: off + # build up re pattern for the content between the quote delimiters + inner_pattern = [] - if escQuote: - inner_pattern += r"{}(?:{})".format(sep, re.escape(escQuote)) - sep = "|" + if esc_quote: + inner_pattern.append(rf"(?:{re.escape(esc_quote)})") - if escChar: - inner_pattern += r"{}(?:{}.)".format(sep, re.escape(escChar)) - sep = "|" - self.escCharReplacePattern = re.escape(self.escChar) + "(.)" + if esc_char: + inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") - if len(self.endQuoteChar) > 1: - inner_pattern += ( - "{}(?:".format(sep) + if len(self.end_quote_char) > 1: + inner_pattern.append( + "(?:" + "|".join( - "(?:{}(?!{}))".format( - re.escape(self.endQuoteChar[:i]), - re.escape(self.endQuoteChar[i:]), - ) - for i in range(len(self.endQuoteChar) - 1, 0, -1) + f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" + for i in range(len(self.end_quote_char) - 1, 0, -1) ) + ")" ) - sep = "|" - if multiline: - self.flags = re.MULTILINE | re.DOTALL - inner_pattern += r"{}(?:[^{}{}])".format( - sep, - _escape_regex_range_chars(self.endQuoteChar[0]), - (_escape_regex_range_chars(escChar) if escChar is not None else ""), + if self.multiline: + self.re_flags |= re.MULTILINE | re.DOTALL + inner_pattern.append( + rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" + rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])" ) else: - self.flags = 0 - inner_pattern += r"{}(?:[^{}\n\r{}])".format( - sep, - _escape_regex_range_chars(self.endQuoteChar[0]), - (_escape_regex_range_chars(escChar) if escChar is not None else ""), + inner_pattern.append( + rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" + rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])" ) self.pattern = "".join( [ - re.escape(self.quoteChar), + re.escape(self.quote_char), "(?:", - inner_pattern, + '|'.join(inner_pattern), ")*", - re.escape(self.endQuoteChar), + re.escape(self.end_quote_char), ] ) + if self.unquote_results: + if self.convert_whitespace_escapes: + self.unquote_scan_re = re.compile( + rf"({'|'.join(re.escape(k) for k in self.ws_map)})" + rf"|({re.escape(self.esc_char)}.)" + rf"|(\n|.)", + flags=self.re_flags, + ) + else: + self.unquote_scan_re = re.compile( + rf"({re.escape(self.esc_char)}.)" + rf"|(\n|.)", + flags=self.re_flags + ) + # fmt: on + try: - self.re = re.compile(self.pattern, self.flags) + self.re = re.compile(self.pattern, self.re_flags) self.reString = self.pattern self.re_match = self.re.match except re.error: - raise ValueError( - "invalid pattern {!r} passed to Regex".format(self.pattern) - ) + raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") self.errmsg = "Expected " + self.name self.mayIndexError = False self.mayReturnEmpty = True - def _generateDefaultName(self): - if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): - return "string enclosed in {!r}".format(self.quoteChar) + def _generateDefaultName(self) -> str: + if self.quote_char == self.end_quote_char and isinstance( + self.quote_char, str_type + ): + return f"string enclosed in {self.quote_char!r}" - return "quoted string, starting with {} ending with {}".format( - self.quoteChar, self.endQuoteChar - ) + return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" def parseImpl(self, instring, loc, doActions=True): + # check first character of opening quote to see if that is a match + # before doing the more complicated regex match result = ( - instring[loc] == self.firstQuoteChar + instring[loc] == self.first_quote_char and self.re_match(instring, loc) or None ) if not result: raise ParseException(instring, loc, self.errmsg, self) + # get ending loc and matched string from regex matching result loc = result.end() ret = result.group() - if self.unquoteResults: - + if self.unquote_results: # strip off quotes - ret = ret[self.quoteCharLen : -self.endQuoteCharLen] + ret = ret[self.quote_char_len : -self.end_quote_char_len] if isinstance(ret, str_type): - # replace escaped whitespace - if "\\" in ret and self.convertWhitespaceEscapes: - for wslit, wschar in self.ws_map: - ret = ret.replace(wslit, wschar) - - # replace escaped characters - if self.escChar: - ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) + # fmt: off + if self.convert_whitespace_escapes: + # as we iterate over matches in the input string, + # collect from whichever match group of the unquote_scan_re + # regex matches (only 1 group will match at any given time) + ret = "".join( + # match group 1 matches \t, \n, etc. + self.ws_map[match.group(1)] if match.group(1) + # match group 2 matches escaped characters + else match.group(2)[-1] if match.group(2) + # match group 3 matches any character + else match.group(3) + for match in self.unquote_scan_re.finditer(ret) + ) + else: + ret = "".join( + # match group 1 matches escaped characters + match.group(1)[-1] if match.group(1) + # match group 2 matches any character + else match.group(2) + for match in self.unquote_scan_re.finditer(ret) + ) + # fmt: on # replace escaped quotes - if self.escQuote: - ret = ret.replace(self.escQuote, self.endQuoteChar) + if self.esc_quote: + ret = ret.replace(self.esc_quote, self.end_quote_char) return loc, ret @@ -3252,7 +3405,7 @@ class CharsNotIn(Token): # define a comma-separated-value as anything that is not a ',' csv_value = CharsNotIn(',') - print(delimited_list(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) + print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) prints:: @@ -3294,12 +3447,12 @@ class CharsNotIn(Token): self.mayReturnEmpty = self.minLen == 0 self.mayIndexError = False - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: not_chars_str = _collapse_string_to_ranges(self.notChars) if len(not_chars_str) > 16: - return "!W:({}...)".format(self.notChars[: 16 - 3]) + return f"!W:({self.notChars[: 16 - 3]}...)" else: - return "!W:({})".format(self.notChars) + return f"!W:({self.notChars})" def parseImpl(self, instring, loc, doActions=True): notchars = self.notCharsSet @@ -3376,7 +3529,7 @@ class White(Token): self.maxLen = exact self.minLen = exact - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return "".join(White.whiteStrs[c] for c in self.matchWhite) def parseImpl(self, instring, loc, doActions=True): @@ -3411,7 +3564,7 @@ class GoToColumn(PositionToken): super().__init__() self.col = colno - def preParse(self, instring, loc): + def preParse(self, instring: str, loc: int) -> int: if col(loc, instring) != self.col: instrlen = len(instring) if self.ignoreExprs: @@ -3446,7 +3599,7 @@ class LineStart(PositionToken): B AAA and definitely not this one ''' - for t in (LineStart() + 'AAA' + restOfLine).search_string(test): + for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): print(t) prints:: @@ -3464,7 +3617,7 @@ class LineStart(PositionToken): self.skipper = Empty().set_whitespace_chars(self.whiteChars) self.errmsg = "Expected start of line" - def preParse(self, instring, loc): + def preParse(self, instring: str, loc: int) -> int: if loc == 0: return loc else: @@ -3624,7 +3777,7 @@ class ParseExpression(ParserElement): self.exprs = [exprs] self.callPreparse = False - def recurse(self) -> Sequence[ParserElement]: + def recurse(self) -> List[ParserElement]: return self.exprs[:] def append(self, other) -> ParserElement: @@ -3669,8 +3822,8 @@ class ParseExpression(ParserElement): e.ignore(self.ignoreExprs[-1]) return self - def _generateDefaultName(self): - return "{}:({})".format(self.__class__.__name__, str(self.exprs)) + def _generateDefaultName(self) -> str: + return f"{self.__class__.__name__}:({str(self.exprs)})" def streamline(self) -> ParserElement: if self.streamlined: @@ -3714,6 +3867,11 @@ class ParseExpression(ParserElement): return self def validate(self, validateTrace=None) -> None: + warnings.warn( + "ParserElement.validate() is deprecated, and should not be used to check for left recursion", + DeprecationWarning, + stacklevel=2, + ) tmp = (validateTrace if validateTrace is not None else [])[:] + [self] for e in self.exprs: e.validate(tmp) @@ -3721,6 +3879,7 @@ class ParseExpression(ParserElement): def copy(self) -> ParserElement: ret = super().copy() + ret = typing.cast(ParseExpression, ret) ret.exprs = [e.copy() for e in self.exprs] return ret @@ -3750,8 +3909,14 @@ class ParseExpression(ParserElement): return super()._setResultsName(name, listAllMatches) - ignoreWhitespace = ignore_whitespace - leaveWhitespace = leave_whitespace + # Compatibility synonyms + # fmt: off + @replaced_by_pep8(leave_whitespace) + def leaveWhitespace(self): ... + + @replaced_by_pep8(ignore_whitespace) + def ignoreWhitespace(self): ... + # fmt: on class And(ParseExpression): @@ -3777,7 +3942,7 @@ class And(ParseExpression): super().__init__(*args, **kwargs) self.leave_whitespace() - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return "-" def __init__( @@ -3789,7 +3954,9 @@ class And(ParseExpression): for i, expr in enumerate(exprs): if expr is Ellipsis: if i < len(exprs) - 1: - skipto_arg: ParserElement = (Empty() + exprs[i + 1]).exprs[-1] + skipto_arg: ParserElement = typing.cast( + ParseExpression, (Empty() + exprs[i + 1]) + ).exprs[-1] tmp.append(SkipTo(skipto_arg)("_skipped*")) else: raise Exception( @@ -3822,8 +3989,9 @@ class And(ParseExpression): and isinstance(e.exprs[-1], _PendingSkip) for e in self.exprs[:-1] ): + deleted_expr_marker = NoMatch() for i, e in enumerate(self.exprs[:-1]): - if e is None: + if e is deleted_expr_marker: continue if ( isinstance(e, ParseExpression) @@ -3831,17 +3999,19 @@ class And(ParseExpression): and isinstance(e.exprs[-1], _PendingSkip) ): e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] - self.exprs[i + 1] = None - self.exprs = [e for e in self.exprs if e is not None] + self.exprs[i + 1] = deleted_expr_marker + self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] super().streamline() # link any IndentedBlocks to the prior expression + prev: ParserElement + cur: ParserElement for prev, cur in zip(self.exprs, self.exprs[1:]): # traverse cur or any first embedded expr of cur looking for an IndentedBlock # (but watch out for recursive grammar) seen = set() - while cur: + while True: if id(cur) in seen: break seen.add(id(cur)) @@ -3853,7 +4023,10 @@ class And(ParseExpression): ) break subs = cur.recurse() - cur = next(iter(subs), None) + next_first = next(iter(subs), None) + if next_first is None: + break + cur = typing.cast(ParserElement, next_first) self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) return self @@ -3884,13 +4057,14 @@ class And(ParseExpression): ) else: loc, exprtokens = e._parse(instring, loc, doActions) - if exprtokens or exprtokens.haskeys(): - resultlist += exprtokens + resultlist += exprtokens return loc, resultlist def __iadd__(self, other): if isinstance(other, str_type): other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + return NotImplemented return self.append(other) # And([self, other]) def _checkRecursion(self, parseElementList): @@ -3900,7 +4074,7 @@ class And(ParseExpression): if not e.mayReturnEmpty: break - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: inner = " ".join(str(e) for e in self.exprs) # strip off redundant inner {}'s while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": @@ -3958,7 +4132,7 @@ class Or(ParseExpression): loc2 = e.try_parse(instring, loc, raise_fatal=True) except ParseFatalException as pfe: pfe.__traceback__ = None - pfe.parserElement = e + pfe.parser_element = e fatals.append(pfe) maxException = None maxExcLoc = -1 @@ -4016,12 +4190,15 @@ class Or(ParseExpression): if len(fatals) > 1: fatals.sort(key=lambda e: -e.loc) if fatals[0].loc == fatals[1].loc: - fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) + fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) max_fatal = fatals[0] raise max_fatal if maxException is not None: - maxException.msg = self.errmsg + # infer from this check that all alternatives failed at the current position + # so emit this collective error message instead of any single error message + if maxExcLoc == loc: + maxException.msg = self.errmsg raise maxException else: raise ParseException( @@ -4031,9 +4208,11 @@ class Or(ParseExpression): def __ixor__(self, other): if isinstance(other, str_type): other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + return NotImplemented return self.append(other) # Or([self, other]) - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" def _setResultsName(self, name, listAllMatches=False): @@ -4118,7 +4297,7 @@ class MatchFirst(ParseExpression): ) except ParseFatalException as pfe: pfe.__traceback__ = None - pfe.parserElement = e + pfe.parser_element = e raise except ParseException as err: if err.loc > maxExcLoc: @@ -4132,7 +4311,10 @@ class MatchFirst(ParseExpression): maxExcLoc = len(instring) if maxException is not None: - maxException.msg = self.errmsg + # infer from this check that all alternatives failed at the current position + # so emit this collective error message instead of any individual error message + if maxExcLoc == loc: + maxException.msg = self.errmsg raise maxException else: raise ParseException( @@ -4142,9 +4324,11 @@ class MatchFirst(ParseExpression): def __ior__(self, other): if isinstance(other, str_type): other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + return NotImplemented return self.append(other) # MatchFirst([self, other]) - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return "{" + " | ".join(str(e) for e in self.exprs) + "}" def _setResultsName(self, name, listAllMatches=False): @@ -4242,6 +4426,13 @@ class Each(ParseExpression): self.initExprGroups = True self.saveAsList = True + def __iand__(self, other): + if isinstance(other, str_type): + other = self._literalStringClass(other) + if not isinstance(other, ParserElement): + return NotImplemented + return self.append(other) # Each([self, other]) + def streamline(self) -> ParserElement: super().streamline() if self.exprs: @@ -4296,7 +4487,7 @@ class Each(ParseExpression): tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) except ParseFatalException as pfe: pfe.__traceback__ = None - pfe.parserElement = e + pfe.parser_element = e fatals.append(pfe) failed.append(e) except ParseException: @@ -4315,7 +4506,7 @@ class Each(ParseExpression): if len(fatals) > 1: fatals.sort(key=lambda e: -e.loc) if fatals[0].loc == fatals[1].loc: - fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) + fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) max_fatal = fatals[0] raise max_fatal @@ -4324,7 +4515,7 @@ class Each(ParseExpression): raise ParseException( instring, loc, - "Missing one or more required elements ({})".format(missing), + f"Missing one or more required elements ({missing})", ) # add any unmatched Opts, in case they have default values defined @@ -4337,7 +4528,7 @@ class Each(ParseExpression): return loc, total_results - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return "{" + " & ".join(str(e) for e in self.exprs) + "}" @@ -4349,12 +4540,14 @@ class ParseElementEnhance(ParserElement): def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): super().__init__(savelist) if isinstance(expr, str_type): + expr_str = typing.cast(str, expr) if issubclass(self._literalStringClass, Token): - expr = self._literalStringClass(expr) + expr = self._literalStringClass(expr_str) # type: ignore[call-arg] elif issubclass(type(self), self._literalStringClass): - expr = Literal(expr) + expr = Literal(expr_str) else: - expr = self._literalStringClass(Literal(expr)) + expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] + expr = typing.cast(ParserElement, expr) self.expr = expr if expr is not None: self.mayIndexError = expr.mayIndexError @@ -4367,12 +4560,17 @@ class ParseElementEnhance(ParserElement): self.callPreparse = expr.callPreparse self.ignoreExprs.extend(expr.ignoreExprs) - def recurse(self) -> Sequence[ParserElement]: + def recurse(self) -> List[ParserElement]: return [self.expr] if self.expr is not None else [] def parseImpl(self, instring, loc, doActions=True): if self.expr is not None: - return self.expr._parse(instring, loc, doActions, callPreParse=False) + try: + return self.expr._parse(instring, loc, doActions, callPreParse=False) + except ParseBaseException as pbe: + if not isinstance(self, Forward) or self.customName is not None: + pbe.msg = self.errmsg + raise else: raise ParseException(instring, loc, "No expression defined", self) @@ -4380,8 +4578,8 @@ class ParseElementEnhance(ParserElement): super().leave_whitespace(recursive) if recursive: - self.expr = self.expr.copy() if self.expr is not None: + self.expr = self.expr.copy() self.expr.leave_whitespace(recursive) return self @@ -4389,8 +4587,8 @@ class ParseElementEnhance(ParserElement): super().ignore_whitespace(recursive) if recursive: - self.expr = self.expr.copy() if self.expr is not None: + self.expr = self.expr.copy() self.expr.ignore_whitespace(recursive) return self @@ -4420,6 +4618,11 @@ class ParseElementEnhance(ParserElement): self.expr._checkRecursion(subRecCheckList) def validate(self, validateTrace=None) -> None: + warnings.warn( + "ParserElement.validate() is deprecated, and should not be used to check for left recursion", + DeprecationWarning, + stacklevel=2, + ) if validateTrace is None: validateTrace = [] tmp = validateTrace[:] + [self] @@ -4427,11 +4630,17 @@ class ParseElementEnhance(ParserElement): self.expr.validate(tmp) self._checkRecursion([]) - def _generateDefaultName(self): - return "{}:({})".format(self.__class__.__name__, str(self.expr)) + def _generateDefaultName(self) -> str: + return f"{self.__class__.__name__}:({str(self.expr)})" - ignoreWhitespace = ignore_whitespace - leaveWhitespace = leave_whitespace + # Compatibility synonyms + # fmt: off + @replaced_by_pep8(leave_whitespace) + def leaveWhitespace(self): ... + + @replaced_by_pep8(ignore_whitespace) + def ignoreWhitespace(self): ... + # fmt: on class IndentedBlock(ParseElementEnhance): @@ -4443,13 +4652,13 @@ class IndentedBlock(ParseElementEnhance): class _Indent(Empty): def __init__(self, ref_col: int): super().__init__() - self.errmsg = "expected indent at column {}".format(ref_col) + self.errmsg = f"expected indent at column {ref_col}" self.add_condition(lambda s, l, t: col(l, s) == ref_col) class _IndentGreater(Empty): def __init__(self, ref_col: int): super().__init__() - self.errmsg = "expected indent at column greater than {}".format(ref_col) + self.errmsg = f"expected indent at column greater than {ref_col}" self.add_condition(lambda s, l, t: col(l, s) > ref_col) def __init__( @@ -4469,7 +4678,7 @@ class IndentedBlock(ParseElementEnhance): # see if self.expr matches at the current location - if not it will raise an exception # and no further work is necessary - self.expr.try_parse(instring, anchor_loc, doActions) + self.expr.try_parse(instring, anchor_loc, do_actions=doActions) indent_col = col(anchor_loc, instring) peer_detect_expr = self._Indent(indent_col) @@ -4532,7 +4741,7 @@ class AtLineStart(ParseElementEnhance): B AAA and definitely not this one ''' - for t in (AtLineStart('AAA') + restOfLine).search_string(test): + for t in (AtLineStart('AAA') + rest_of_line).search_string(test): print(t) prints:: @@ -4598,9 +4807,9 @@ class PrecededBy(ParseElementEnhance): Parameters: - - expr - expression that must match prior to the current parse + - ``expr`` - expression that must match prior to the current parse location - - retreat - (default= ``None``) - (int) maximum number of characters + - ``retreat`` - (default= ``None``) - (int) maximum number of characters to lookbehind prior to the current parse location If the lookbehind expression is a string, :class:`Literal`, @@ -4627,6 +4836,7 @@ class PrecededBy(ParseElementEnhance): self.mayIndexError = False self.exact = False if isinstance(expr, str_type): + expr = typing.cast(str, expr) retreat = len(expr) self.exact = True elif isinstance(expr, (Literal, Keyword)): @@ -4746,18 +4956,18 @@ class NotAny(ParseElementEnhance): self.errmsg = "Found unwanted token, " + str(self.expr) def parseImpl(self, instring, loc, doActions=True): - if self.expr.can_parse_next(instring, loc): + if self.expr.can_parse_next(instring, loc, do_actions=doActions): raise ParseException(instring, loc, self.errmsg, self) return loc, [] - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return "~{" + str(self.expr) + "}" class _MultipleMatch(ParseElementEnhance): def __init__( self, - expr: ParserElement, + expr: Union[str, ParserElement], stop_on: typing.Optional[Union[ParserElement, str]] = None, *, stopOn: typing.Optional[Union[ParserElement, str]] = None, @@ -4781,7 +4991,7 @@ class _MultipleMatch(ParseElementEnhance): self_skip_ignorables = self._skipIgnorables check_ender = self.not_ender is not None if check_ender: - try_not_ender = self.not_ender.tryParse + try_not_ender = self.not_ender.try_parse # must be at least one (but first see if we are the stopOn sentinel; # if so, fail) @@ -4798,8 +5008,7 @@ class _MultipleMatch(ParseElementEnhance): else: preloc = loc loc, tmptokens = self_expr_parse(instring, preloc, doActions) - if tmptokens or tmptokens.haskeys(): - tokens += tmptokens + tokens += tmptokens except (ParseException, IndexError): pass @@ -4837,10 +5046,11 @@ class OneOrMore(_MultipleMatch): Repetition of one or more of the given expression. Parameters: - - expr - expression that must match one or more times - - stop_on - (default= ``None``) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) + + - ``expr`` - expression that must match one or more times + - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel + (only required if the sentinel would ordinarily match the repetition + expression) Example:: @@ -4859,7 +5069,7 @@ class OneOrMore(_MultipleMatch): (attr_expr * (1,)).parse_string(text).pprint() """ - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return "{" + str(self.expr) + "}..." @@ -4868,6 +5078,7 @@ class ZeroOrMore(_MultipleMatch): Optional repetition of zero or more of the given expression. Parameters: + - ``expr`` - expression that must match zero or more times - ``stop_on`` - expression for a terminating sentinel (only required if the sentinel would ordinarily match the repetition @@ -4878,7 +5089,7 @@ class ZeroOrMore(_MultipleMatch): def __init__( self, - expr: ParserElement, + expr: Union[str, ParserElement], stop_on: typing.Optional[Union[ParserElement, str]] = None, *, stopOn: typing.Optional[Union[ParserElement, str]] = None, @@ -4892,10 +5103,75 @@ class ZeroOrMore(_MultipleMatch): except (ParseException, IndexError): return loc, ParseResults([], name=self.resultsName) - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: return "[" + str(self.expr) + "]..." +class DelimitedList(ParseElementEnhance): + def __init__( + self, + expr: Union[str, ParserElement], + delim: Union[str, ParserElement] = ",", + combine: bool = False, + min: typing.Optional[int] = None, + max: typing.Optional[int] = None, + *, + allow_trailing_delim: bool = False, + ): + """Helper to define a delimited list of expressions - the delimiter + defaults to ','. By default, the list elements and delimiters can + have intervening whitespace, and comments, but this can be + overridden by passing ``combine=True`` in the constructor. If + ``combine`` is set to ``True``, the matching tokens are + returned as a single token string, with the delimiters included; + otherwise, the matching tokens are returned as a list of tokens, + with the delimiters suppressed. + + If ``allow_trailing_delim`` is set to True, then the list may end with + a delimiter. + + Example:: + + DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] + DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] + """ + if isinstance(expr, str_type): + expr = ParserElement._literalStringClass(expr) + expr = typing.cast(ParserElement, expr) + + if min is not None: + if min < 1: + raise ValueError("min must be greater than 0") + if max is not None: + if min is not None and max < min: + raise ValueError("max must be greater than, or equal to min") + + self.content = expr + self.raw_delim = str(delim) + self.delim = delim + self.combine = combine + if not combine: + self.delim = Suppress(delim) + self.min = min or 1 + self.max = max + self.allow_trailing_delim = allow_trailing_delim + + delim_list_expr = self.content + (self.delim + self.content) * ( + self.min - 1, + None if self.max is None else self.max - 1, + ) + if self.allow_trailing_delim: + delim_list_expr += Opt(self.delim) + + if self.combine: + delim_list_expr = Combine(delim_list_expr) + + super().__init__(delim_list_expr, savelist=True) + + def _generateDefaultName(self) -> str: + return "{0} [{1} {0}]...".format(self.content.streamline(), self.raw_delim) + + class _NullToken: def __bool__(self): return False @@ -4909,6 +5185,7 @@ class Opt(ParseElementEnhance): Optional matching of the given expression. Parameters: + - ``expr`` - expression that must match zero or more times - ``default`` (optional) - value to be returned if the optional expression is not found. @@ -4969,7 +5246,7 @@ class Opt(ParseElementEnhance): tokens = [] return loc, tokens - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: inner = str(self.expr) # strip off redundant inner {}'s while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": @@ -4986,6 +5263,7 @@ class SkipTo(ParseElementEnhance): expression is found. Parameters: + - ``expr`` - target expression marking the end of the data to be skipped - ``include`` - if ``True``, the target expression is also parsed (the skipped text and target expression are returned as a 2-element @@ -5045,10 +5323,10 @@ class SkipTo(ParseElementEnhance): self, other: Union[ParserElement, str], include: bool = False, - ignore: bool = None, + ignore: typing.Optional[Union[ParserElement, str]] = None, fail_on: typing.Optional[Union[ParserElement, str]] = None, *, - failOn: Union[ParserElement, str] = None, + failOn: typing.Optional[Union[ParserElement, str]] = None, ): super().__init__(other) failOn = failOn or fail_on @@ -5062,6 +5340,20 @@ class SkipTo(ParseElementEnhance): else: self.failOn = failOn self.errmsg = "No match found for " + str(self.expr) + self.ignorer = Empty().leave_whitespace() + self._update_ignorer() + + def _update_ignorer(self): + # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr + self.ignorer.ignoreExprs.clear() + for e in self.expr.ignoreExprs: + self.ignorer.ignore(e) + if self.ignoreExpr: + self.ignorer.ignore(self.ignoreExpr) + + def ignore(self, expr): + super().ignore(expr) + self._update_ignorer() def parseImpl(self, instring, loc, doActions=True): startloc = loc @@ -5070,9 +5362,7 @@ class SkipTo(ParseElementEnhance): self_failOn_canParseNext = ( self.failOn.canParseNext if self.failOn is not None else None ) - self_ignoreExpr_tryParse = ( - self.ignoreExpr.tryParse if self.ignoreExpr is not None else None - ) + ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None tmploc = loc while tmploc <= instrlen: @@ -5081,13 +5371,18 @@ class SkipTo(ParseElementEnhance): if self_failOn_canParseNext(instring, tmploc): break - if self_ignoreExpr_tryParse is not None: + if ignorer_try_parse is not None: # advance past ignore expressions + prev_tmploc = tmploc while 1: try: - tmploc = self_ignoreExpr_tryParse(instring, tmploc) + tmploc = ignorer_try_parse(instring, tmploc) except ParseBaseException: break + # see if all ignorers matched, but didn't actually ignore anything + if tmploc == prev_tmploc: + break + prev_tmploc = tmploc try: self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) @@ -5145,15 +5440,20 @@ class Forward(ParseElementEnhance): def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): self.caller_frame = traceback.extract_stack(limit=2)[0] - super().__init__(other, savelist=False) + super().__init__(other, savelist=False) # type: ignore[arg-type] self.lshift_line = None - def __lshift__(self, other): + def __lshift__(self, other) -> "Forward": if hasattr(self, "caller_frame"): del self.caller_frame if isinstance(other, str_type): other = self._literalStringClass(other) + + if not isinstance(other, ParserElement): + return NotImplemented + self.expr = other + self.streamlined = other.streamlined self.mayIndexError = self.expr.mayIndexError self.mayReturnEmpty = self.expr.mayReturnEmpty self.set_whitespace_chars( @@ -5162,13 +5462,16 @@ class Forward(ParseElementEnhance): self.skipWhitespace = self.expr.skipWhitespace self.saveAsList = self.expr.saveAsList self.ignoreExprs.extend(self.expr.ignoreExprs) - self.lshift_line = traceback.extract_stack(limit=2)[-2] + self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] return self - def __ilshift__(self, other): + def __ilshift__(self, other) -> "Forward": + if not isinstance(other, ParserElement): + return NotImplemented + return self << other - def __or__(self, other): + def __or__(self, other) -> "ParserElement": caller_line = traceback.extract_stack(limit=2)[-2] if ( __diag__.warn_on_match_first_with_lshift_operator @@ -5205,12 +5508,12 @@ class Forward(ParseElementEnhance): not in self.suppress_warnings_ ): # walk stack until parse_string, scan_string, search_string, or transform_string is found - parse_fns = [ + parse_fns = ( "parse_string", "scan_string", "search_string", "transform_string", - ] + ) tb = traceback.extract_stack(limit=200) for i, frm in enumerate(reversed(tb), start=1): if frm.name in parse_fns: @@ -5308,6 +5611,11 @@ class Forward(ParseElementEnhance): return self def validate(self, validateTrace=None) -> None: + warnings.warn( + "ParserElement.validate() is deprecated, and should not be used to check for left recursion", + DeprecationWarning, + stacklevel=2, + ) if validateTrace is None: validateTrace = [] @@ -5317,7 +5625,7 @@ class Forward(ParseElementEnhance): self.expr.validate(tmp) self._checkRecursion([]) - def _generateDefaultName(self): + def _generateDefaultName(self) -> str: # Avoid infinite recursion by setting a temporary _defaultName self._defaultName = ": ..." @@ -5356,8 +5664,14 @@ class Forward(ParseElementEnhance): return super()._setResultsName(name, list_all_matches) - ignoreWhitespace = ignore_whitespace - leaveWhitespace = leave_whitespace + # Compatibility synonyms + # fmt: off + @replaced_by_pep8(leave_whitespace) + def leaveWhitespace(self): ... + + @replaced_by_pep8(ignore_whitespace) + def ignoreWhitespace(self): ... + # fmt: on class TokenConverter(ParseElementEnhance): @@ -5439,11 +5753,11 @@ class Group(TokenConverter): ident = Word(alphas) num = Word(nums) term = ident | num - func = ident + Opt(delimited_list(term)) + func = ident + Opt(DelimitedList(term)) print(func.parse_string("fn a, b, 100")) # -> ['fn', 'a', 'b', '100'] - func = ident + Group(Opt(delimited_list(term))) + func = ident + Group(Opt(DelimitedList(term))) print(func.parse_string("fn a, b, 100")) # -> ['fn', ['a', 'b', '100']] """ @@ -5579,7 +5893,7 @@ class Suppress(TokenConverter): ['a', 'b', 'c', 'd'] ['START', 'relevant text ', 'END'] - (See also :class:`delimited_list`.) + (See also :class:`DelimitedList`.) """ def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): @@ -5638,15 +5952,13 @@ def trace_parse_action(f: ParseAction) -> ParseAction: s, l, t = paArgs[-3:] if len(paArgs) > 3: thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc - sys.stderr.write( - ">>entering {}(line: {!r}, {}, {!r})\n".format(thisFunc, line(l, s), l, t) - ) + sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") try: ret = f(*paArgs) except Exception as exc: - sys.stderr.write("< str: ) try: return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) - except Exception: + except Exception as e: return "" @@ -5769,7 +6081,11 @@ def autoname_elements() -> None: Utility to simplify mass-naming of parser elements, for generating railroad diagram with named subdiagrams. """ - for name, var in sys._getframe().f_back.f_locals.items(): + calling_frame = sys._getframe().f_back + if calling_frame is None: + return + calling_frame = typing.cast(types.FrameType, calling_frame) + for name, var in calling_frame.f_locals.items(): if isinstance(var, ParserElement) and not var.customName: var.set_name(name) @@ -5783,9 +6099,28 @@ sgl_quoted_string = Combine( ).set_name("string enclosed in single quotes") quoted_string = Combine( - Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' - | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" -).set_name("quotedString using single or double quotes") + (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( + "double quoted string" + ) + | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( + "single quoted string" + ) +).set_name("quoted string using single or double quotes") + +python_quoted_string = Combine( + (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( + "multiline double quoted string" + ) + ^ ( + Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" + ).set_name("multiline single quoted string") + ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( + "double quoted string" + ) + ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( + "single quoted string" + ) +).set_name("Python quoted string") unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") @@ -5800,9 +6135,7 @@ _builtin_exprs: List[ParserElement] = [ ] # backward compatibility names -tokenMap = token_map -conditionAsParseAction = condition_as_parse_action -nullDebugAction = null_debug_action +# fmt: off sglQuotedString = sgl_quoted_string dblQuotedString = dbl_quoted_string quotedString = quoted_string @@ -5811,4 +6144,16 @@ lineStart = line_start lineEnd = line_end stringStart = string_start stringEnd = string_end -traceParseAction = trace_parse_action + +@replaced_by_pep8(null_debug_action) +def nullDebugAction(): ... + +@replaced_by_pep8(trace_parse_action) +def traceParseAction(): ... + +@replaced_by_pep8(condition_as_parse_action) +def conditionAsParseAction(): ... + +@replaced_by_pep8(token_map) +def tokenMap(): ... +# fmt: on diff --git a/lib/pyparsing/diagram/__init__.py b/lib/pyparsing/diagram/__init__.py index 89864475..267f3447 100644 --- a/lib/pyparsing/diagram/__init__.py +++ b/lib/pyparsing/diagram/__init__.py @@ -1,3 +1,4 @@ +# mypy: ignore-errors import railroad import pyparsing import typing @@ -17,11 +18,13 @@ import inspect jinja2_template_source = """\ +{% if not embed %} +{% endif %} {% if not head %} -