Bump beautifulsoup4 from 4.12.2 to 4.12.3 (#2267)

* Bump beautifulsoup4 from 4.12.2 to 4.12.3

Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.12.2 to 4.12.3.

---
updated-dependencies:
- dependency-name: beautifulsoup4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update beautifulsoup4==4.12.3

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2024-03-24 15:26:22 -07:00 committed by GitHub
parent faef9a94c4
commit a0170a6f3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 263 additions and 173 deletions

View file

@ -15,8 +15,8 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.12.2"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
__version__ = "4.12.3"
__copyright__ = "Copyright (c) 2004-2024 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"

View file

@ -514,15 +514,19 @@ class DetectsXMLParsedAsHTML(object):
XML_PREFIX_B = b'<?xml'
@classmethod
def warn_if_markup_looks_like_xml(cls, markup):
def warn_if_markup_looks_like_xml(cls, markup, stacklevel=3):
"""Perform a check on some markup to see if it looks like XML
that's not XHTML. If so, issue a warning.
This is much less reliable than doing the check while parsing,
but some of the tree builders can't do that.
:param stacklevel: The stacklevel of the code calling this
function.
:return: True if the markup looks like non-XHTML XML, False
otherwise.
"""
if isinstance(markup, bytes):
prefix = cls.XML_PREFIX_B
@ -535,15 +539,16 @@ class DetectsXMLParsedAsHTML(object):
and markup.startswith(prefix)
and not looks_like_html.search(markup[:500])
):
cls._warn()
cls._warn(stacklevel=stacklevel+2)
return True
return False
@classmethod
def _warn(cls):
def _warn(cls, stacklevel=5):
"""Issue a warning about XML being parsed as HTML."""
warnings.warn(
XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning
XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning,
stacklevel=stacklevel
)
def _initialize_xml_detector(self):

View file

@ -77,7 +77,9 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
# html5lib only parses HTML, so if it's given XML that's worth
# noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup)
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
markup, stacklevel=3
)
yield (markup, None, None, False)

View file

@ -378,10 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
parser.close()
except AssertionError as e:
# html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e)
parser.close()
parser.already_closed_empty_element = []

View file

@ -179,7 +179,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.processing_instruction_class = ProcessingInstruction
# We're in HTML mode, so if we're given XML, that's worth
# noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup)
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
markup, stacklevel=3
)
else:
self.processing_instruction_class = XMLProcessingInstruction

View file

@ -1356,7 +1356,7 @@ class Tag(PageElement):
This is the first step in the deepcopy process.
"""
clone = type(self)(
None, self.builder, self.name, self.namespace,
None, None, self.name, self.namespace,
self.prefix, self.attrs, is_xml=self._is_xml,
sourceline=self.sourceline, sourcepos=self.sourcepos,
can_be_empty_element=self.can_be_empty_element,
@ -1845,6 +1845,11 @@ class Tag(PageElement):
return space_before + s + space_after
def _format_tag(self, eventual_encoding, formatter, opening):
if self.hidden:
# A hidden tag is invisible, although its contents
# are visible.
return ''
# A tag starts with the < character (see below).
# Then the / character, if this is a closing tag.

View file

@ -51,7 +51,7 @@ class Formatter(EntitySubstitution):
void_element_close_prefix='/', cdata_containing_tags=None,
empty_attributes_are_booleans=False, indent=1,
):
"""Constructor.
r"""Constructor.
:param language: This should be Formatter.XML if you are formatting
XML markup and Formatter.HTML if you are formatting HTML markup.
@ -76,7 +76,7 @@ class Formatter(EntitySubstitution):
negative, or "" will only insert newlines. Using a
positive integer indent indents that many spaces per
level. If indent is a string (such as "\t"), that string
is used to indent each level. The default behavior to
is used to indent each level. The default behavior is to
indent one space per level.
"""
self.language = language

View file

@ -1105,7 +1105,7 @@ class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
doc = """<?xml version="1.0" encoding="utf-8"?>
<Document xmlns="http://example.com/ns0"
xmlns:ns1="http://example.com/ns1"
xmlns:ns2="http://example.com/ns2"
xmlns:ns2="http://example.com/ns2">
<ns1:tag>foo</ns1:tag>
<ns1:tag>bar</ns1:tag>
<ns2:tag key="value">baz</ns2:tag>

View file

@ -0,0 +1 @@
˙ ><applet></applet><applet></applet><apple|><applet><applet><appl„><applet><applet></applet></applet></applet></applet><applet></applet><apple>t<applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet>et><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><azplet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><plet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet></applet></applet></applet></applet></appt></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet><<meta charset=utf-8>

View file

@ -0,0 +1 @@
- ˙˙ <math><select><mi><select><select>t

View file

@ -14,30 +14,75 @@ from bs4 import (
BeautifulSoup,
ParserRejectedMarkup,
)
try:
from soupsieve.util import SelectorSyntaxError
import lxml
import html5lib
fully_fuzzable = True
except ImportError:
fully_fuzzable = False
@pytest.mark.skipif(not fully_fuzzable, reason="Prerequisites for fuzz tests are not installed.")
class TestFuzz(object):
# Test case markup files from fuzzers are given this extension so
# they can be included in builds.
TESTCASE_SUFFIX = ".testcase"
# Copied 20230512 from
# https://github.com/google/oss-fuzz/blob/4ac6a645a197a695fe76532251feb5067076b3f3/projects/bs4/bs4_fuzzer.py
#
# Copying the code lets us precisely duplicate the behavior of
# oss-fuzz. The downside is that this code changes over time, so
# multiple copies of the code must be kept around to run against
# older tests. I'm not sure what to do about this, but I may
# retire old tests after a time.
def fuzz_test_with_css(self, filename):
data = self.__markup(filename)
parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml']
try:
idx = int(data[0]) % len(parsers)
except ValueError:
return
css_selector, data = data[1:10], data[10:]
try:
soup = BeautifulSoup(data[1:], features=parsers[idx])
except ParserRejectedMarkup:
return
except ValueError:
return
list(soup.find_all(True))
try:
soup.css.select(css_selector.decode('utf-8', 'replace'))
except SelectorSyntaxError:
return
soup.prettify()
# This class of error has been fixed by catching a less helpful
# exception from html.parser and raising ParserRejectedMarkup
# instead.
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
"crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a",
]
)
def test_rejected_markup(self, filename):
markup = self.__markup(filename)
with pytest.raises(ParserRejectedMarkup):
BeautifulSoup(markup, 'html.parser')
# This class of error has to do with very deeply nested documents
# which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755].
#
# These test cases are in the older format that doesn't specify
# which parser to use or give a CSS selector.
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
@ -46,18 +91,44 @@ class TestFuzz(object):
"clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
]
)
def test_deeply_nested_document(self, filename):
def test_deeply_nested_document_without_css(self, filename):
# Parsing the document and encoding it back to a string is
# sufficient to demonstrate that the overflow problem has
# been fixed.
markup = self.__markup(filename)
BeautifulSoup(markup, 'html.parser').encode()
# This class of error has to do with very deeply nested documents
# which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755].
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624",
]
)
def test_deeply_nested_document(self, filename):
self.fuzz_test_with_css(filename)
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824",
]
)
def test_soupsieve_errors(self, filename):
self.fuzz_test_with_css(filename)
# This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues.
@pytest.mark.skip("html5lib problems")
#
# These test cases are in the older format that doesn't specify
# which parser to use or give a CSS selector.
@pytest.mark.skip(reason="html5lib-specific problems")
@pytest.mark.parametrize(
"filename", [
# b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
@ -68,7 +139,7 @@ class TestFuzz(object):
# b'-<math><sElect><mi><sElect><sElect>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896",
# b'ñ<table><svg><html>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224",
@ -79,10 +150,24 @@ class TestFuzz(object):
"crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
]
)
def test_html5lib_parse_errors(self, filename):
def test_html5lib_parse_errors_without_css(self, filename):
markup = self.__markup(filename)
print(BeautifulSoup(markup, 'html5lib').encode())
# This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues.
@pytest.mark.skip(reason="html5lib-specific problems")
@pytest.mark.parametrize(
"filename", [
# b'- \xff\xff <math>\x10<select><mi><select><select>t'
"clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640",
]
)
def test_html5lib_parse_errors(self, filename):
self.fuzz_test_with_css(filename)
def __markup(self, filename):
if not filename.endswith(self.TESTCASE_SUFFIX):
filename += self.TESTCASE_SUFFIX

View file

@ -219,3 +219,16 @@ class TestMultiValuedAttributes(SoupTest):
)
assert soup.a['class'] == 'foo'
assert soup.a['id'] == ['bar']
def test_hidden_tag_is_invisible(self):
# Setting .hidden on a tag makes it invisible in output, but
# leaves its contents visible.
#
# This is not a documented or supported feature of Beautiful
# Soup (e.g. NavigableString doesn't support .hidden even
# though it could), but some people use it and it's not
# hurting anything to verify that it keeps working.
#
soup = self.soup('<div id="1"><span id="2">a string</span></div>')
soup.span.hidden = True
assert '<div id="1">a string</div>' == str(soup.div)

View file

@ -78,13 +78,13 @@ def purge() -> None:
def closest(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
) -> bs4.Tag:
"""Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag)
@ -92,7 +92,7 @@ def closest(
def match(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
@ -106,13 +106,13 @@ def match(
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
iterable: Iterable[bs4.Tag],
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
) -> list[bs4.Tag]:
"""Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable)
@ -120,13 +120,13 @@ def filter( # noqa: A001
def select_one(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
) -> bs4.Tag:
"""Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag)
@ -134,14 +134,14 @@ def select_one(
def select(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
) -> list[bs4.Tag]:
"""Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
@ -149,18 +149,17 @@ def select(
def iselect(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
) -> Iterator[bs4.Tag]:
"""Iterate the specified tags."""
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
yield el
yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit)
def escape(ident: str) -> str:

View file

@ -93,7 +93,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
raise ValueError("All version parts except 'release' should be integers.")
if release not in REL_MAP:
raise ValueError("'{}' is not a valid release type.".format(release))
raise ValueError(f"'{release}' is not a valid release type.")
# Ensure valid pre-release (we do not allow implicit pre-releases).
if ".dev-candidate" < release < "final":
@ -118,7 +118,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
elif dev:
raise ValueError("Version is not a development release.")
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
return super().__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self) -> bool:
"""Is prerelease."""
@ -145,15 +145,15 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
if self.micro == 0:
ver = "{}.{}".format(self.major, self.minor)
ver = f"{self.major}.{self.minor}"
else:
ver = "{}.{}.{}".format(self.major, self.minor, self.micro)
ver = f"{self.major}.{self.minor}.{self.micro}"
if self._is_pre():
ver += '{}{}'.format(REL_MAP[self.release], self.pre)
ver += f'{REL_MAP[self.release]}{self.pre}'
if self._is_post():
ver += ".post{}".format(self.post)
ver += f".post{self.post}"
if self._is_dev():
ver += ".dev{}".format(self.dev)
ver += f".dev{self.dev}"
return ver
@ -164,7 +164,7 @@ def parse_version(ver: str) -> Version:
m = RE_VER.match(ver)
if m is None:
raise ValueError("'{}' is not a valid version".format(ver))
raise ValueError(f"'{ver}' is not a valid version")
# Handle major, minor, micro
major = int(m.group('major'))
@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 4, 1, "final")
__version_info__ = Version(2, 5, 0, "final")
__version__ = __version_info__._get_canonical()

View file

@ -85,7 +85,7 @@ class _DocumentNav:
# Fail on unexpected types.
if not cls.is_tag(tag):
raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag)))
raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")
@staticmethod
def is_doc(obj: bs4.Tag) -> bool:
@ -165,8 +165,7 @@ class _DocumentNav:
def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
"""Get contents or contents in reverse."""
if not no_iframe or not self.is_iframe(el):
for content in el.contents:
yield content
yield from el.contents
def get_children(
self,
@ -283,7 +282,7 @@ class _DocumentNav:
like we do in the case of `is_html_tag`.
"""
ns = getattr(el, 'namespace') if el else None
ns = getattr(el, 'namespace') if el else None # noqa: B009
return bool(ns and ns == NS_XHTML)
@staticmethod
@ -394,7 +393,7 @@ class Inputs:
def validate_week(year: int, week: int) -> bool:
"""Validate week."""
max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1]
max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1]
if max_week == 1:
max_week = 53
return 1 <= week <= max_week
@ -1272,11 +1271,7 @@ class CSSMatch(_DocumentNav):
# Auto handling for text inputs
if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
if is_textarea:
temp = []
for node in self.get_contents(el, no_iframe=True):
if self.is_content_string(node):
temp.append(node)
value = ''.join(temp)
value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node))
else:
value = cast(str, self.get_attribute_by_name(el, 'value', ''))
if value:
@ -1571,17 +1566,14 @@ class SoupSieve(ct.Immutable):
def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
"""Iterate the specified tags."""
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
yield el
yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format(
self.pattern,
self.namespaces,
self.custom,
self.flags
return (
f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "
f"custom={self.custom!r}, flags={self.flags!r})"
)
__str__ = __repr__

View file

@ -92,94 +92,79 @@ PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSE
# Sub-patterns parts
# Whitespace
NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
WS = r'(?:[ \t]|{})'.format(NEWLINE)
WS = fr'(?:[ \t]|{NEWLINE})'
# Comments
COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
# Whitespace with comments included
WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS)
WSC = fr'(?:{WS}|{COMMENTS})'
# CSS escapes
CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS)
CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE)
CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))'
CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))'
# CSS Identifier
IDENTIFIER = r'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*)
'''.format(esc=CSS_ESCAPES)
IDENTIFIER = fr'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*)
'''
# `nth` content
NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC)
NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?'
# Value: quoted string or identifier
VALUE = r'''
(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+)
'''.format(nl=NEWLINE, ident=IDENTIFIER)
VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)'''
# Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r'''
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE)
ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*\]'
# Selector patterns
# IDs (`#id`)
PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
PAT_ID = fr'\#{IDENTIFIER}'
# Classes (`.class`)
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
PAT_CLASS = fr'\.{IDENTIFIER}'
# Prefix:Tag (`prefix|tag`)
PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)'
# Attributes (`[attr]`, `[attr=value]`, etc.)
PAT_ATTR = r'''
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}'
# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?'
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER)
PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)'
# Custom pseudo class (`:--custom-pseudo`)
PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER)
PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})'
# Closing pseudo group (`)`)
PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC)
PAT_PSEUDO_CLOSE = fr'{WSC}*\)'
# Pseudo element (`::pseudo-element`)
PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS)
PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}'
# At rule (`@page`, etc.) (not supported)
PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER)
PAT_AT_RULE = fr'@P{IDENTIFIER}'
# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
PAT_PSEUDO_NTH_CHILD = r'''
(?P<pseudo_nth_child>{name}
(?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*))
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH)
PAT_PSEUDO_NTH_CHILD = fr'''
(?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*))
'''
# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
PAT_PSEUDO_NTH_TYPE = r'''
(?P<pseudo_nth_type>{name}
(?P<nth_type>{nth}|even|odd)){ws}*\)
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH)
PAT_PSEUDO_NTH_TYPE = fr'''
(?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_type>{NTH}|even|odd)){WSC}*\)
'''
# Pseudo class language (`:lang("*-de", en)`)
PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
# Pseudo class direction (`:dir(ltr)`)
PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC)
PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)'
# Combining characters (`>`, `~`, ` `, `+`, `,`)
PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC)
PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*'
# Extra: Contains (`:contains(text)`)
PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
# Regular expressions
# CSS escape pattern
RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I)
RE_CSS_STR_ESC = re.compile(
r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I
)
RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I)
RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I)
# Pattern to break up `nth` specifiers
RE_NTH = re.compile(
r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC),
re.I
)
RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I)
# Pattern to iterate multiple values.
RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X)
RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X)
# Whitespace checks
RE_WS = re.compile(WS)
RE_WS_BEGIN = re.compile('^{}*'.format(WSC))
RE_WS_END = re.compile('{}*$'.format(WSC))
RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X)
RE_WS_BEGIN = re.compile(fr'^{WSC}*')
RE_WS_END = re.compile(fr'{WSC}*$')
RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X)
# Constants
# List split token
@ -241,9 +226,9 @@ def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.Sele
for key, value in custom.items():
name = util.lower(key)
if RE_CUSTOM.match(name) is None:
raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name))
raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name")
if name in custom_selectors:
raise KeyError("The custom selector '{}' has already been registered".format(name))
raise KeyError(f"The custom selector '{name}' has already been registered")
custom_selectors[css_unescape(name)] = value
return custom_selectors
@ -283,23 +268,23 @@ def escape(ident: str) -> str:
start_dash = length > 0 and ident[0] == '-'
if length == 1 and start_dash:
# Need to escape identifier that is a single `-` with no other characters
string.append('\\{}'.format(ident))
string.append(f'\\{ident}')
else:
for index, c in enumerate(ident):
codepoint = ord(c)
if codepoint == 0x00:
string.append('\ufffd')
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
string.append('\\{:x} '.format(codepoint))
string.append(f'\\{codepoint:x} ')
elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
string.append('\\{:x} '.format(codepoint))
string.append(f'\\{codepoint:x} ')
elif (
codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
(0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
):
string.append(c)
else:
string.append('\\{}'.format(c))
string.append(f'\\{c}')
return ''.join(string)
@ -419,11 +404,10 @@ class _Selector:
"""String representation."""
return (
'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, '
'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'
).format(
self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors,
self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, '
f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, '
f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, '
f'no_match={self.no_match!r})'
)
__repr__ = __str__
@ -563,7 +547,7 @@ class CSSParser:
selector = self.custom.get(pseudo)
if selector is None:
raise SelectorSyntaxError(
"Undefined custom selector '{}' found at position {}".format(pseudo, m.end(0)),
f"Undefined custom selector '{pseudo}' found at position {m.end(0)}",
self.pattern,
m.end(0)
)
@ -663,13 +647,13 @@ class CSSParser:
has_selector = True
elif pseudo in PSEUDO_SUPPORTED:
raise SelectorSyntaxError(
"Invalid syntax for pseudo class '{}'".format(pseudo),
f"Invalid syntax for pseudo class '{pseudo}'",
self.pattern,
m.start(0)
)
else:
raise NotImplementedError(
"'{}' pseudo-class is not implemented at this time".format(pseudo)
f"'{pseudo}' pseudo-class is not implemented at this time"
)
return has_selector, is_html
@ -793,7 +777,7 @@ class CSSParser:
# multiple non-whitespace combinators. So if the current combinator is not a whitespace,
# then we've hit the multiple combinator case, so we should fail.
raise SelectorSyntaxError(
'The multiple combinators at position {}'.format(index),
f'The multiple combinators at position {index}',
self.pattern,
index
)
@ -824,7 +808,7 @@ class CSSParser:
if not has_selector:
if not is_forgive or combinator != COMMA_COMBINATOR:
raise SelectorSyntaxError(
"The combinator '{}' at position {}, must have a selector before it".format(combinator, index),
f"The combinator '{combinator}' at position {index}, must have a selector before it",
self.pattern,
index
)
@ -869,7 +853,7 @@ class CSSParser:
pseudo = util.lower(css_unescape(m.group('name')))
if pseudo == ":contains":
warnings.warn(
warnings.warn( # noqa: B028
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
FutureWarning
)
@ -982,13 +966,13 @@ class CSSParser:
# Handle parts
if key == "at_rule":
raise NotImplementedError("At-rules found at position {}".format(m.start(0)))
raise NotImplementedError(f"At-rules found at position {m.start(0)}")
elif key == 'pseudo_class_custom':
has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
elif key == 'pseudo_class':
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
elif key == 'pseudo_element':
raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}")
elif key == 'pseudo_contains':
has_selector = self.parse_pseudo_contains(sel, m, has_selector)
elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@ -1003,7 +987,7 @@ class CSSParser:
if not has_selector:
if not is_forgive:
raise SelectorSyntaxError(
"Expected a selector at position {}".format(m.start(0)),
f"Expected a selector at position {m.start(0)}",
self.pattern,
m.start(0)
)
@ -1013,7 +997,7 @@ class CSSParser:
break
else:
raise SelectorSyntaxError(
"Unmatched pseudo-class close at position {}".format(m.start(0)),
f"Unmatched pseudo-class close at position {m.start(0)}",
self.pattern,
m.start(0)
)
@ -1031,7 +1015,7 @@ class CSSParser:
elif key == 'tag':
if has_selector:
raise SelectorSyntaxError(
"Tag name found at position {} instead of at the start".format(m.start(0)),
f"Tag name found at position {m.start(0)} instead of at the start",
self.pattern,
m.start(0)
)
@ -1046,7 +1030,7 @@ class CSSParser:
# Handle selectors that are not closed
if is_open and not closed:
raise SelectorSyntaxError(
"Unclosed pseudo-class at position {}".format(index),
f"Unclosed pseudo-class at position {index}",
self.pattern,
index
)
@ -1076,7 +1060,7 @@ class CSSParser:
# We will always need to finish a selector when `:has()` is used as it leads with combining.
# May apply to others as well.
raise SelectorSyntaxError(
'Expected a selector at position {}'.format(index),
f'Expected a selector at position {index}',
self.pattern,
index
)
@ -1108,7 +1092,7 @@ class CSSParser:
end = (m.start(0) - 1) if m else (len(pattern) - 1)
if self.debug: # pragma: no cover
print('## PARSING: {!r}'.format(pattern))
print(f'## PARSING: {pattern!r}')
while index <= end:
m = None
for v in self.css_tokens:
@ -1116,7 +1100,7 @@ class CSSParser:
if m:
name = v.get_name()
if self.debug: # pragma: no cover
print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0)))
print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}")
index = m.end(0)
yield name, m
break
@ -1126,15 +1110,15 @@ class CSSParser:
# throw an exception mentioning that the known selector type is in error;
# otherwise, report the invalid character.
if c == '[':
msg = "Malformed attribute selector at position {}".format(index)
msg = f"Malformed attribute selector at position {index}"
elif c == '.':
msg = "Malformed class selector at position {}".format(index)
msg = f"Malformed class selector at position {index}"
elif c == '#':
msg = "Malformed id selector at position {}".format(index)
msg = f"Malformed id selector at position {index}"
elif c == ':':
msg = "Malformed pseudo-class selector at position {}".format(index)
msg = f"Malformed pseudo-class selector at position {index}"
else:
msg = "Invalid character {!r} position {}".format(c, index)
msg = f"Invalid character {c!r} position {index}"
raise SelectorSyntaxError(msg, self.pattern, index)
if self.debug: # pragma: no cover
print('## END PARSING')

View file

@ -45,11 +45,11 @@ class Immutable:
for k, v in kwargs.items():
temp.append(type(v))
temp.append(v)
super(Immutable, self).__setattr__(k, v)
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
super().__setattr__(k, v)
super().__setattr__('_hash', hash(tuple(temp)))
@classmethod
def __base__(cls) -> "type[Immutable]":
def __base__(cls) -> type[Immutable]:
"""Get base class."""
return cls
@ -59,7 +59,7 @@ class Immutable:
return (
isinstance(other, self.__base__()) and
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash')
)
def __ne__(self, other: Any) -> bool:
@ -67,7 +67,7 @@ class Immutable:
return (
not isinstance(other, self.__base__()) or
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash')
)
def __hash__(self) -> int:
@ -78,14 +78,13 @@ class Immutable:
def __setattr__(self, name: str, value: Any) -> None:
"""Prevent mutability."""
raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
raise AttributeError(f"'{self.__class__.__name__}' is immutable")
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{}({})".format(
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
)
r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]])
return f"{self.__class__.__name__}({r})"
__str__ = __repr__
@ -112,10 +111,10 @@ class ImmutableDict(Mapping[Any, Any]):
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, Hashable) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
if not all(isinstance(v, Hashable) for v in arg.values()):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
def __iter__(self) -> Iterator[Any]:
"""Iterator."""
@ -140,7 +139,7 @@ class ImmutableDict(Mapping[Any, Any]):
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{!r}".format(self._d)
return f"{self._d!r}"
__str__ = __repr__
@ -157,10 +156,10 @@ class Namespaces(ImmutableDict):
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
if not all(isinstance(v, str) for v in arg.values()):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class CustomSelectors(ImmutableDict):
@ -175,10 +174,10 @@ class CustomSelectors(ImmutableDict):
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
if not all(isinstance(v, str) for v in arg.values()):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class Selector(Immutable):
@ -367,7 +366,7 @@ class SelectorList(Immutable):
"""Initialize."""
super().__init__(
selectors=tuple(selectors) if selectors is not None else tuple(),
selectors=tuple(selectors) if selectors is not None else (),
is_not=is_not,
is_html=is_html
)

View file

@ -10,7 +10,7 @@ The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners).
Example:
-------
```
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
@ -64,6 +64,7 @@ SelectorList(
is_not=False,
is_html=False)
```
"""
from __future__ import annotations
import re
@ -123,16 +124,16 @@ def pretty(obj: Any) -> str: # pragma: no cover
index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent))
output.append(f'{m.group(0)}\n{" " * indent}')
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'):
indent -= 4
output.append(m.group(0))
elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent))
output.append(f'{m.group(1)}\n{" " * indent}')
elif name in ('dsep',):
output.append('{} '.format(m.group(1)))
output.append(f'{m.group(1)} ')
break
return ''.join(output)

View file

@ -37,7 +37,7 @@ class SelectorSyntaxError(Exception):
if pattern is not None and index is not None:
# Format pattern to show line and column position
self.context, self.line, self.col = get_pattern_context(pattern, index)
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context)
msg = f'{msg}\n line {self.line}:\n{self.context}'
super().__init__(msg)
@ -105,7 +105,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
# we will render the output with just `\n`. We will still log the column
# correctly though.
text.append('\n')
text.append('{}{}'.format(indent, linetext))
text.append(f'{indent}{linetext}')
if offset is not None:
text.append('\n')
text.append(' ' * (col + offset) + '^')

View file

@ -3,7 +3,7 @@ arrow==1.3.0
backports.csv==1.0.7
backports.functools-lru-cache==2.0.0
backports.zoneinfo==0.2.1;python_version<"3.9"
beautifulsoup4==4.12.2
beautifulsoup4==4.12.3
bleach==6.1.0
certifi==2024.2.2
cheroot==10.0.0