Bump beautifulsoup4 from 4.12.2 to 4.12.3 (#2267)

* Bump beautifulsoup4 from 4.12.2 to 4.12.3

Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.12.2 to 4.12.3.

---
updated-dependencies:
- dependency-name: beautifulsoup4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update beautifulsoup4==4.12.3

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2024-03-24 15:26:22 -07:00 committed by GitHub
parent faef9a94c4
commit a0170a6f3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 263 additions and 173 deletions

View file

@ -15,8 +15,8 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
""" """
__author__ = "Leonard Richardson (leonardr@segfault.org)" __author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.12.2" __version__ = "4.12.3"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson" __copyright__ = "Copyright (c) 2004-2024 Leonard Richardson"
# Use of this source code is governed by the MIT license. # Use of this source code is governed by the MIT license.
__license__ = "MIT" __license__ = "MIT"

View file

@ -514,15 +514,19 @@ class DetectsXMLParsedAsHTML(object):
XML_PREFIX_B = b'<?xml' XML_PREFIX_B = b'<?xml'
@classmethod @classmethod
def warn_if_markup_looks_like_xml(cls, markup): def warn_if_markup_looks_like_xml(cls, markup, stacklevel=3):
"""Perform a check on some markup to see if it looks like XML """Perform a check on some markup to see if it looks like XML
that's not XHTML. If so, issue a warning. that's not XHTML. If so, issue a warning.
This is much less reliable than doing the check while parsing, This is much less reliable than doing the check while parsing,
but some of the tree builders can't do that. but some of the tree builders can't do that.
:param stacklevel: The stacklevel of the code calling this
function.
:return: True if the markup looks like non-XHTML XML, False :return: True if the markup looks like non-XHTML XML, False
otherwise. otherwise.
""" """
if isinstance(markup, bytes): if isinstance(markup, bytes):
prefix = cls.XML_PREFIX_B prefix = cls.XML_PREFIX_B
@ -535,15 +539,16 @@ class DetectsXMLParsedAsHTML(object):
and markup.startswith(prefix) and markup.startswith(prefix)
and not looks_like_html.search(markup[:500]) and not looks_like_html.search(markup[:500])
): ):
cls._warn() cls._warn(stacklevel=stacklevel+2)
return True return True
return False return False
@classmethod @classmethod
def _warn(cls): def _warn(cls, stacklevel=5):
"""Issue a warning about XML being parsed as HTML.""" """Issue a warning about XML being parsed as HTML."""
warnings.warn( warnings.warn(
XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning,
stacklevel=stacklevel
) )
def _initialize_xml_detector(self): def _initialize_xml_detector(self):

View file

@ -77,7 +77,9 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
# html5lib only parses HTML, so if it's given XML that's worth # html5lib only parses HTML, so if it's given XML that's worth
# noting. # noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
markup, stacklevel=3
)
yield (markup, None, None, False) yield (markup, None, None, False)

View file

@ -378,10 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup parser.soup = self.soup
try: try:
parser.feed(markup) parser.feed(markup)
parser.close()
except AssertionError as e: except AssertionError as e:
# html.parser raises AssertionError in rare cases to # html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially # indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration. # when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e) raise ParserRejectedMarkup(e)
parser.close()
parser.already_closed_empty_element = [] parser.already_closed_empty_element = []

View file

@ -179,7 +179,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.processing_instruction_class = ProcessingInstruction self.processing_instruction_class = ProcessingInstruction
# We're in HTML mode, so if we're given XML, that's worth # We're in HTML mode, so if we're given XML, that's worth
# noting. # noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
markup, stacklevel=3
)
else: else:
self.processing_instruction_class = XMLProcessingInstruction self.processing_instruction_class = XMLProcessingInstruction

View file

@ -1356,7 +1356,7 @@ class Tag(PageElement):
This is the first step in the deepcopy process. This is the first step in the deepcopy process.
""" """
clone = type(self)( clone = type(self)(
None, self.builder, self.name, self.namespace, None, None, self.name, self.namespace,
self.prefix, self.attrs, is_xml=self._is_xml, self.prefix, self.attrs, is_xml=self._is_xml,
sourceline=self.sourceline, sourcepos=self.sourcepos, sourceline=self.sourceline, sourcepos=self.sourcepos,
can_be_empty_element=self.can_be_empty_element, can_be_empty_element=self.can_be_empty_element,
@ -1845,6 +1845,11 @@ class Tag(PageElement):
return space_before + s + space_after return space_before + s + space_after
def _format_tag(self, eventual_encoding, formatter, opening): def _format_tag(self, eventual_encoding, formatter, opening):
if self.hidden:
# A hidden tag is invisible, although its contents
# are visible.
return ''
# A tag starts with the < character (see below). # A tag starts with the < character (see below).
# Then the / character, if this is a closing tag. # Then the / character, if this is a closing tag.

View file

@ -51,7 +51,7 @@ class Formatter(EntitySubstitution):
void_element_close_prefix='/', cdata_containing_tags=None, void_element_close_prefix='/', cdata_containing_tags=None,
empty_attributes_are_booleans=False, indent=1, empty_attributes_are_booleans=False, indent=1,
): ):
"""Constructor. r"""Constructor.
:param language: This should be Formatter.XML if you are formatting :param language: This should be Formatter.XML if you are formatting
XML markup and Formatter.HTML if you are formatting HTML markup. XML markup and Formatter.HTML if you are formatting HTML markup.
@ -76,7 +76,7 @@ class Formatter(EntitySubstitution):
negative, or "" will only insert newlines. Using a negative, or "" will only insert newlines. Using a
positive integer indent indents that many spaces per positive integer indent indents that many spaces per
level. If indent is a string (such as "\t"), that string level. If indent is a string (such as "\t"), that string
is used to indent each level. The default behavior to is used to indent each level. The default behavior is to
indent one space per level. indent one space per level.
""" """
self.language = language self.language = language

View file

@ -1105,7 +1105,7 @@ class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
doc = """<?xml version="1.0" encoding="utf-8"?> doc = """<?xml version="1.0" encoding="utf-8"?>
<Document xmlns="http://example.com/ns0" <Document xmlns="http://example.com/ns0"
xmlns:ns1="http://example.com/ns1" xmlns:ns1="http://example.com/ns1"
xmlns:ns2="http://example.com/ns2" xmlns:ns2="http://example.com/ns2">
<ns1:tag>foo</ns1:tag> <ns1:tag>foo</ns1:tag>
<ns1:tag>bar</ns1:tag> <ns1:tag>bar</ns1:tag>
<ns2:tag key="value">baz</ns2:tag> <ns2:tag key="value">baz</ns2:tag>

View file

@ -0,0 +1 @@
˙ ><applet></applet><applet></applet><apple|><applet><applet><appl„><applet><applet></applet></applet></applet></applet><applet></applet><apple>t<applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet>et><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><azplet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><plet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet></applet></applet></applet></applet></appt></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet><<meta charset=utf-8>

View file

@ -0,0 +1 @@
- ˙˙ <math><select><mi><select><select>t

View file

@ -14,19 +14,61 @@ from bs4 import (
BeautifulSoup, BeautifulSoup,
ParserRejectedMarkup, ParserRejectedMarkup,
) )
try:
from soupsieve.util import SelectorSyntaxError
import lxml
import html5lib
fully_fuzzable = True
except ImportError:
fully_fuzzable = False
@pytest.mark.skipif(not fully_fuzzable, reason="Prerequisites for fuzz tests are not installed.")
class TestFuzz(object): class TestFuzz(object):
# Test case markup files from fuzzers are given this extension so # Test case markup files from fuzzers are given this extension so
# they can be included in builds. # they can be included in builds.
TESTCASE_SUFFIX = ".testcase" TESTCASE_SUFFIX = ".testcase"
# Copied 20230512 from
# https://github.com/google/oss-fuzz/blob/4ac6a645a197a695fe76532251feb5067076b3f3/projects/bs4/bs4_fuzzer.py
#
# Copying the code lets us precisely duplicate the behavior of
# oss-fuzz. The downside is that this code changes over time, so
# multiple copies of the code must be kept around to run against
# older tests. I'm not sure what to do about this, but I may
# retire old tests after a time.
def fuzz_test_with_css(self, filename):
data = self.__markup(filename)
parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml']
try:
idx = int(data[0]) % len(parsers)
except ValueError:
return
css_selector, data = data[1:10], data[10:]
try:
soup = BeautifulSoup(data[1:], features=parsers[idx])
except ParserRejectedMarkup:
return
except ValueError:
return
list(soup.find_all(True))
try:
soup.css.select(css_selector.decode('utf-8', 'replace'))
except SelectorSyntaxError:
return
soup.prettify()
# This class of error has been fixed by catching a less helpful # This class of error has been fixed by catching a less helpful
# exception from html.parser and raising ParserRejectedMarkup # exception from html.parser and raising ParserRejectedMarkup
# instead. # instead.
@pytest.mark.parametrize( @pytest.mark.parametrize(
"filename", [ "filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912", "clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
"crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a",
] ]
) )
def test_rejected_markup(self, filename): def test_rejected_markup(self, filename):
@ -38,6 +80,9 @@ class TestFuzz(object):
# which overflow the Python call stack when the tree is converted # which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed # to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755]. # as part of [bug=1471755].
#
# These test cases are in the older format that doesn't specify
# which parser to use or give a CSS selector.
@pytest.mark.parametrize( @pytest.mark.parametrize(
"filename", [ "filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440", "clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
@ -46,18 +91,44 @@ class TestFuzz(object):
"clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400", "clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
] ]
) )
def test_deeply_nested_document(self, filename): def test_deeply_nested_document_without_css(self, filename):
# Parsing the document and encoding it back to a string is # Parsing the document and encoding it back to a string is
# sufficient to demonstrate that the overflow problem has # sufficient to demonstrate that the overflow problem has
# been fixed. # been fixed.
markup = self.__markup(filename) markup = self.__markup(filename)
BeautifulSoup(markup, 'html.parser').encode() BeautifulSoup(markup, 'html.parser').encode()
# This class of error has to do with very deeply nested documents
# which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755].
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624",
]
)
def test_deeply_nested_document(self, filename):
self.fuzz_test_with_css(filename)
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824",
]
)
def test_soupsieve_errors(self, filename):
self.fuzz_test_with_css(filename)
# This class of error represents problems with html5lib's parser, # This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use # not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify # https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues. # the html5lib developers of these issues.
@pytest.mark.skip("html5lib problems") #
# These test cases are in the older format that doesn't specify
# which parser to use or give a CSS selector.
@pytest.mark.skip(reason="html5lib-specific problems")
@pytest.mark.parametrize( @pytest.mark.parametrize(
"filename", [ "filename", [
# b"""ÿ<!DOCTyPEV PUBLIC'''Ð'""" # b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
@ -79,10 +150,24 @@ class TestFuzz(object):
"crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08" "crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
] ]
) )
def test_html5lib_parse_errors(self, filename): def test_html5lib_parse_errors_without_css(self, filename):
markup = self.__markup(filename) markup = self.__markup(filename)
print(BeautifulSoup(markup, 'html5lib').encode()) print(BeautifulSoup(markup, 'html5lib').encode())
# This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues.
@pytest.mark.skip(reason="html5lib-specific problems")
@pytest.mark.parametrize(
"filename", [
# b'- \xff\xff <math>\x10<select><mi><select><select>t'
"clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640",
]
)
def test_html5lib_parse_errors(self, filename):
self.fuzz_test_with_css(filename)
def __markup(self, filename): def __markup(self, filename):
if not filename.endswith(self.TESTCASE_SUFFIX): if not filename.endswith(self.TESTCASE_SUFFIX):
filename += self.TESTCASE_SUFFIX filename += self.TESTCASE_SUFFIX

View file

@ -219,3 +219,16 @@ class TestMultiValuedAttributes(SoupTest):
) )
assert soup.a['class'] == 'foo' assert soup.a['class'] == 'foo'
assert soup.a['id'] == ['bar'] assert soup.a['id'] == ['bar']
def test_hidden_tag_is_invisible(self):
# Setting .hidden on a tag makes it invisible in output, but
# leaves its contents visible.
#
# This is not a documented or supported feature of Beautiful
# Soup (e.g. NavigableString doesn't support .hidden even
# though it could), but some people use it and it's not
# hurting anything to verify that it keeps working.
#
soup = self.soup('<div id="1"><span id="2">a string</span></div>')
soup.span.hidden = True
assert '<div id="1">a string</div>' == str(soup.div)

View file

@ -78,13 +78,13 @@ def purge() -> None:
def closest( def closest(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> 'bs4.Tag': ) -> bs4.Tag:
"""Match closest ancestor.""" """Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag) return compile(select, namespaces, flags, **kwargs).closest(tag)
@ -92,7 +92,7 @@ def closest(
def match( def match(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
@ -106,13 +106,13 @@ def match(
def filter( # noqa: A001 def filter( # noqa: A001
select: str, select: str,
iterable: Iterable['bs4.Tag'], iterable: Iterable[bs4.Tag],
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> list['bs4.Tag']: ) -> list[bs4.Tag]:
"""Filter list of nodes.""" """Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable) return compile(select, namespaces, flags, **kwargs).filter(iterable)
@ -120,13 +120,13 @@ def filter( # noqa: A001
def select_one( def select_one(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> 'bs4.Tag': ) -> bs4.Tag:
"""Select a single tag.""" """Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag) return compile(select, namespaces, flags, **kwargs).select_one(tag)
@ -134,14 +134,14 @@ def select_one(
def select( def select(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
limit: int = 0, limit: int = 0,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> list['bs4.Tag']: ) -> list[bs4.Tag]:
"""Select the specified tags.""" """Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit) return compile(select, namespaces, flags, **kwargs).select(tag, limit)
@ -149,18 +149,17 @@ def select(
def iselect( def iselect(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
limit: int = 0, limit: int = 0,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> Iterator['bs4.Tag']: ) -> Iterator[bs4.Tag]:
"""Iterate the specified tags.""" """Iterate the specified tags."""
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit): yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit)
yield el
def escape(ident: str) -> str: def escape(ident: str) -> str:

View file

@ -93,7 +93,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
raise ValueError("All version parts except 'release' should be integers.") raise ValueError("All version parts except 'release' should be integers.")
if release not in REL_MAP: if release not in REL_MAP:
raise ValueError("'{}' is not a valid release type.".format(release)) raise ValueError(f"'{release}' is not a valid release type.")
# Ensure valid pre-release (we do not allow implicit pre-releases). # Ensure valid pre-release (we do not allow implicit pre-releases).
if ".dev-candidate" < release < "final": if ".dev-candidate" < release < "final":
@ -118,7 +118,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
elif dev: elif dev:
raise ValueError("Version is not a development release.") raise ValueError("Version is not a development release.")
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev) return super().__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self) -> bool: def _is_pre(self) -> bool:
"""Is prerelease.""" """Is prerelease."""
@ -145,15 +145,15 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed.. # Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
if self.micro == 0: if self.micro == 0:
ver = "{}.{}".format(self.major, self.minor) ver = f"{self.major}.{self.minor}"
else: else:
ver = "{}.{}.{}".format(self.major, self.minor, self.micro) ver = f"{self.major}.{self.minor}.{self.micro}"
if self._is_pre(): if self._is_pre():
ver += '{}{}'.format(REL_MAP[self.release], self.pre) ver += f'{REL_MAP[self.release]}{self.pre}'
if self._is_post(): if self._is_post():
ver += ".post{}".format(self.post) ver += f".post{self.post}"
if self._is_dev(): if self._is_dev():
ver += ".dev{}".format(self.dev) ver += f".dev{self.dev}"
return ver return ver
@ -164,7 +164,7 @@ def parse_version(ver: str) -> Version:
m = RE_VER.match(ver) m = RE_VER.match(ver)
if m is None: if m is None:
raise ValueError("'{}' is not a valid version".format(ver)) raise ValueError(f"'{ver}' is not a valid version")
# Handle major, minor, micro # Handle major, minor, micro
major = int(m.group('major')) major = int(m.group('major'))
@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev) return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 4, 1, "final") __version_info__ = Version(2, 5, 0, "final")
__version__ = __version_info__._get_canonical() __version__ = __version_info__._get_canonical()

View file

@ -85,7 +85,7 @@ class _DocumentNav:
# Fail on unexpected types. # Fail on unexpected types.
if not cls.is_tag(tag): if not cls.is_tag(tag):
raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag))) raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")
@staticmethod @staticmethod
def is_doc(obj: bs4.Tag) -> bool: def is_doc(obj: bs4.Tag) -> bool:
@ -165,8 +165,7 @@ class _DocumentNav:
def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]: def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
"""Get contents or contents in reverse.""" """Get contents or contents in reverse."""
if not no_iframe or not self.is_iframe(el): if not no_iframe or not self.is_iframe(el):
for content in el.contents: yield from el.contents
yield content
def get_children( def get_children(
self, self,
@ -283,7 +282,7 @@ class _DocumentNav:
like we do in the case of `is_html_tag`. like we do in the case of `is_html_tag`.
""" """
ns = getattr(el, 'namespace') if el else None ns = getattr(el, 'namespace') if el else None # noqa: B009
return bool(ns and ns == NS_XHTML) return bool(ns and ns == NS_XHTML)
@staticmethod @staticmethod
@ -394,7 +393,7 @@ class Inputs:
def validate_week(year: int, week: int) -> bool: def validate_week(year: int, week: int) -> bool:
"""Validate week.""" """Validate week."""
max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1] max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1]
if max_week == 1: if max_week == 1:
max_week = 53 max_week = 53
return 1 <= week <= max_week return 1 <= week <= max_week
@ -1272,11 +1271,7 @@ class CSSMatch(_DocumentNav):
# Auto handling for text inputs # Auto handling for text inputs
if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0: if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
if is_textarea: if is_textarea:
temp = [] value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node))
for node in self.get_contents(el, no_iframe=True):
if self.is_content_string(node):
temp.append(node)
value = ''.join(temp)
else: else:
value = cast(str, self.get_attribute_by_name(el, 'value', '')) value = cast(str, self.get_attribute_by_name(el, 'value', ''))
if value: if value:
@ -1571,17 +1566,14 @@ class SoupSieve(ct.Immutable):
def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]: def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
"""Iterate the specified tags.""" """Iterate the specified tags."""
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit): yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)
yield el
def __repr__(self) -> str: # pragma: no cover def __repr__(self) -> str: # pragma: no cover
"""Representation.""" """Representation."""
return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format( return (
self.pattern, f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "
self.namespaces, f"custom={self.custom!r}, flags={self.flags!r})"
self.custom,
self.flags
) )
__str__ = __repr__ __str__ = __repr__

View file

@ -92,94 +92,79 @@ PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSE
# Sub-patterns parts # Sub-patterns parts
# Whitespace # Whitespace
NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
WS = r'(?:[ \t]|{})'.format(NEWLINE) WS = fr'(?:[ \t]|{NEWLINE})'
# Comments # Comments
COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
# Whitespace with comments included # Whitespace with comments included
WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS) WSC = fr'(?:{WS}|{COMMENTS})'
# CSS escapes # CSS escapes
CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS) CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))'
CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE) CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))'
# CSS Identifier # CSS Identifier
IDENTIFIER = r''' IDENTIFIER = fr'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--) (?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*) (?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*)
'''.format(esc=CSS_ESCAPES) '''
# `nth` content # `nth` content
NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC) NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?'
# Value: quoted string or identifier # Value: quoted string or identifier
VALUE = r''' VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)'''
(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+)
'''.format(nl=NEWLINE, ident=IDENTIFIER)
# Attribute value comparison. `!=` is handled special as it is non-standard. # Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r''' ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*\]'
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE)
# Selector patterns # Selector patterns
# IDs (`#id`) # IDs (`#id`)
PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) PAT_ID = fr'\#{IDENTIFIER}'
# Classes (`.class`) # Classes (`.class`)
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) PAT_CLASS = fr'\.{IDENTIFIER}'
# Prefix:Tag (`prefix|tag`) # Prefix:Tag (`prefix|tag`)
PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER) PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)'
# Attributes (`[attr]`, `[attr=value]`, etc.) # Attributes (`[attr]`, `[attr=value]`, etc.)
PAT_ATTR = r''' PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}'
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
# Pseudo class (`:pseudo-class`, `:pseudo-class(`) # Pseudo class (`:pseudo-class`, `:pseudo-class(`)
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?'
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER) PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)'
# Custom pseudo class (`:--custom-pseudo`) # Custom pseudo class (`:--custom-pseudo`)
PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER) PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})'
# Closing pseudo group (`)`) # Closing pseudo group (`)`)
PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC) PAT_PSEUDO_CLOSE = fr'{WSC}*\)'
# Pseudo element (`::pseudo-element`) # Pseudo element (`::pseudo-element`)
PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS) PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}'
# At rule (`@page`, etc.) (not supported) # At rule (`@page`, etc.) (not supported)
PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER) PAT_AT_RULE = fr'@P{IDENTIFIER}'
# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) # Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
PAT_PSEUDO_NTH_CHILD = r''' PAT_PSEUDO_NTH_CHILD = fr'''
(?P<pseudo_nth_child>{name} (?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*)) (?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*))
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH) '''
# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) # Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
PAT_PSEUDO_NTH_TYPE = r''' PAT_PSEUDO_NTH_TYPE = fr'''
(?P<pseudo_nth_type>{name} (?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_type>{nth}|even|odd)){ws}*\) (?P<nth_type>{NTH}|even|odd)){WSC}*\)
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH) '''
# Pseudo class language (`:lang("*-de", en)`) # Pseudo class language (`:lang("*-de", en)`)
PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
# Pseudo class direction (`:dir(ltr)`) # Pseudo class direction (`:dir(ltr)`)
PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC) PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)'
# Combining characters (`>`, `~`, ` `, `+`, `,`) # Combining characters (`>`, `~`, ` `, `+`, `,`)
PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC) PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*'
# Extra: Contains (`:contains(text)`) # Extra: Contains (`:contains(text)`)
PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
# Regular expressions # Regular expressions
# CSS escape pattern # CSS escape pattern
RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I) RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I)
RE_CSS_STR_ESC = re.compile( RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I)
r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I
)
# Pattern to break up `nth` specifiers # Pattern to break up `nth` specifiers
RE_NTH = re.compile( RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I)
r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC),
re.I
)
# Pattern to iterate multiple values. # Pattern to iterate multiple values.
RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X) RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X)
# Whitespace checks # Whitespace checks
RE_WS = re.compile(WS) RE_WS = re.compile(WS)
RE_WS_BEGIN = re.compile('^{}*'.format(WSC)) RE_WS_BEGIN = re.compile(fr'^{WSC}*')
RE_WS_END = re.compile('{}*$'.format(WSC)) RE_WS_END = re.compile(fr'{WSC}*$')
RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X) RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X)
# Constants # Constants
# List split token # List split token
@ -241,9 +226,9 @@ def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.Sele
for key, value in custom.items(): for key, value in custom.items():
name = util.lower(key) name = util.lower(key)
if RE_CUSTOM.match(name) is None: if RE_CUSTOM.match(name) is None:
raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name)) raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name")
if name in custom_selectors: if name in custom_selectors:
raise KeyError("The custom selector '{}' has already been registered".format(name)) raise KeyError(f"The custom selector '{name}' has already been registered")
custom_selectors[css_unescape(name)] = value custom_selectors[css_unescape(name)] = value
return custom_selectors return custom_selectors
@ -283,23 +268,23 @@ def escape(ident: str) -> str:
start_dash = length > 0 and ident[0] == '-' start_dash = length > 0 and ident[0] == '-'
if length == 1 and start_dash: if length == 1 and start_dash:
# Need to escape identifier that is a single `-` with no other characters # Need to escape identifier that is a single `-` with no other characters
string.append('\\{}'.format(ident)) string.append(f'\\{ident}')
else: else:
for index, c in enumerate(ident): for index, c in enumerate(ident):
codepoint = ord(c) codepoint = ord(c)
if codepoint == 0x00: if codepoint == 0x00:
string.append('\ufffd') string.append('\ufffd')
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
string.append('\\{:x} '.format(codepoint)) string.append(f'\\{codepoint:x} ')
elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
string.append('\\{:x} '.format(codepoint)) string.append(f'\\{codepoint:x} ')
elif ( elif (
codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
(0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
): ):
string.append(c) string.append(c)
else: else:
string.append('\\{}'.format(c)) string.append(f'\\{c}')
return ''.join(string) return ''.join(string)
@ -419,11 +404,10 @@ class _Selector:
"""String representation.""" """String representation."""
return ( return (
'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, ' f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, '
'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})' f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, '
).format( f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, '
self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors, f'no_match={self.no_match!r})'
self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
) )
__repr__ = __str__ __repr__ = __str__
@ -563,7 +547,7 @@ class CSSParser:
selector = self.custom.get(pseudo) selector = self.custom.get(pseudo)
if selector is None: if selector is None:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Undefined custom selector '{}' found at position {}".format(pseudo, m.end(0)), f"Undefined custom selector '{pseudo}' found at position {m.end(0)}",
self.pattern, self.pattern,
m.end(0) m.end(0)
) )
@ -663,13 +647,13 @@ class CSSParser:
has_selector = True has_selector = True
elif pseudo in PSEUDO_SUPPORTED: elif pseudo in PSEUDO_SUPPORTED:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Invalid syntax for pseudo class '{}'".format(pseudo), f"Invalid syntax for pseudo class '{pseudo}'",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
else: else:
raise NotImplementedError( raise NotImplementedError(
"'{}' pseudo-class is not implemented at this time".format(pseudo) f"'{pseudo}' pseudo-class is not implemented at this time"
) )
return has_selector, is_html return has_selector, is_html
@ -793,7 +777,7 @@ class CSSParser:
# multiple non-whitespace combinators. So if the current combinator is not a whitespace, # multiple non-whitespace combinators. So if the current combinator is not a whitespace,
# then we've hit the multiple combinator case, so we should fail. # then we've hit the multiple combinator case, so we should fail.
raise SelectorSyntaxError( raise SelectorSyntaxError(
'The multiple combinators at position {}'.format(index), f'The multiple combinators at position {index}',
self.pattern, self.pattern,
index index
) )
@ -824,7 +808,7 @@ class CSSParser:
if not has_selector: if not has_selector:
if not is_forgive or combinator != COMMA_COMBINATOR: if not is_forgive or combinator != COMMA_COMBINATOR:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"The combinator '{}' at position {}, must have a selector before it".format(combinator, index), f"The combinator '{combinator}' at position {index}, must have a selector before it",
self.pattern, self.pattern,
index index
) )
@ -869,7 +853,7 @@ class CSSParser:
pseudo = util.lower(css_unescape(m.group('name'))) pseudo = util.lower(css_unescape(m.group('name')))
if pseudo == ":contains": if pseudo == ":contains":
warnings.warn( warnings.warn( # noqa: B028
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
FutureWarning FutureWarning
) )
@ -982,13 +966,13 @@ class CSSParser:
# Handle parts # Handle parts
if key == "at_rule": if key == "at_rule":
raise NotImplementedError("At-rules found at position {}".format(m.start(0))) raise NotImplementedError(f"At-rules found at position {m.start(0)}")
elif key == 'pseudo_class_custom': elif key == 'pseudo_class_custom':
has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
elif key == 'pseudo_class': elif key == 'pseudo_class':
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
elif key == 'pseudo_element': elif key == 'pseudo_element':
raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0))) raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}")
elif key == 'pseudo_contains': elif key == 'pseudo_contains':
has_selector = self.parse_pseudo_contains(sel, m, has_selector) has_selector = self.parse_pseudo_contains(sel, m, has_selector)
elif key in ('pseudo_nth_type', 'pseudo_nth_child'): elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@ -1003,7 +987,7 @@ class CSSParser:
if not has_selector: if not has_selector:
if not is_forgive: if not is_forgive:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Expected a selector at position {}".format(m.start(0)), f"Expected a selector at position {m.start(0)}",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
@ -1013,7 +997,7 @@ class CSSParser:
break break
else: else:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Unmatched pseudo-class close at position {}".format(m.start(0)), f"Unmatched pseudo-class close at position {m.start(0)}",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
@ -1031,7 +1015,7 @@ class CSSParser:
elif key == 'tag': elif key == 'tag':
if has_selector: if has_selector:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Tag name found at position {} instead of at the start".format(m.start(0)), f"Tag name found at position {m.start(0)} instead of at the start",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
@ -1046,7 +1030,7 @@ class CSSParser:
# Handle selectors that are not closed # Handle selectors that are not closed
if is_open and not closed: if is_open and not closed:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Unclosed pseudo-class at position {}".format(index), f"Unclosed pseudo-class at position {index}",
self.pattern, self.pattern,
index index
) )
@ -1076,7 +1060,7 @@ class CSSParser:
# We will always need to finish a selector when `:has()` is used as it leads with combining. # We will always need to finish a selector when `:has()` is used as it leads with combining.
# May apply to others as well. # May apply to others as well.
raise SelectorSyntaxError( raise SelectorSyntaxError(
'Expected a selector at position {}'.format(index), f'Expected a selector at position {index}',
self.pattern, self.pattern,
index index
) )
@ -1108,7 +1092,7 @@ class CSSParser:
end = (m.start(0) - 1) if m else (len(pattern) - 1) end = (m.start(0) - 1) if m else (len(pattern) - 1)
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
print('## PARSING: {!r}'.format(pattern)) print(f'## PARSING: {pattern!r}')
while index <= end: while index <= end:
m = None m = None
for v in self.css_tokens: for v in self.css_tokens:
@ -1116,7 +1100,7 @@ class CSSParser:
if m: if m:
name = v.get_name() name = v.get_name()
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0))) print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}")
index = m.end(0) index = m.end(0)
yield name, m yield name, m
break break
@ -1126,15 +1110,15 @@ class CSSParser:
# throw an exception mentioning that the known selector type is in error; # throw an exception mentioning that the known selector type is in error;
# otherwise, report the invalid character. # otherwise, report the invalid character.
if c == '[': if c == '[':
msg = "Malformed attribute selector at position {}".format(index) msg = f"Malformed attribute selector at position {index}"
elif c == '.': elif c == '.':
msg = "Malformed class selector at position {}".format(index) msg = f"Malformed class selector at position {index}"
elif c == '#': elif c == '#':
msg = "Malformed id selector at position {}".format(index) msg = f"Malformed id selector at position {index}"
elif c == ':': elif c == ':':
msg = "Malformed pseudo-class selector at position {}".format(index) msg = f"Malformed pseudo-class selector at position {index}"
else: else:
msg = "Invalid character {!r} position {}".format(c, index) msg = f"Invalid character {c!r} position {index}"
raise SelectorSyntaxError(msg, self.pattern, index) raise SelectorSyntaxError(msg, self.pattern, index)
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
print('## END PARSING') print('## END PARSING')

View file

@ -45,11 +45,11 @@ class Immutable:
for k, v in kwargs.items(): for k, v in kwargs.items():
temp.append(type(v)) temp.append(type(v))
temp.append(v) temp.append(v)
super(Immutable, self).__setattr__(k, v) super().__setattr__(k, v)
super(Immutable, self).__setattr__('_hash', hash(tuple(temp))) super().__setattr__('_hash', hash(tuple(temp)))
@classmethod @classmethod
def __base__(cls) -> "type[Immutable]": def __base__(cls) -> type[Immutable]:
"""Get base class.""" """Get base class."""
return cls return cls
@ -59,7 +59,7 @@ class Immutable:
return ( return (
isinstance(other, self.__base__()) and isinstance(other, self.__base__()) and
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash']) all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash')
) )
def __ne__(self, other: Any) -> bool: def __ne__(self, other: Any) -> bool:
@ -67,7 +67,7 @@ class Immutable:
return ( return (
not isinstance(other, self.__base__()) or not isinstance(other, self.__base__()) or
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash']) any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash')
) )
def __hash__(self) -> int: def __hash__(self) -> int:
@ -78,14 +78,13 @@ class Immutable:
def __setattr__(self, name: str, value: Any) -> None: def __setattr__(self, name: str, value: Any) -> None:
"""Prevent mutability.""" """Prevent mutability."""
raise AttributeError("'{}' is immutable".format(self.__class__.__name__)) raise AttributeError(f"'{self.__class__.__name__}' is immutable")
def __repr__(self) -> str: # pragma: no cover def __repr__(self) -> str: # pragma: no cover
"""Representation.""" """Representation."""
return "{}({})".format( r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]])
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]]) return f"{self.__class__.__name__}({r})"
)
__str__ = __repr__ __str__ = __repr__
@ -112,10 +111,10 @@ class ImmutableDict(Mapping[Any, Any]):
"""Validate arguments.""" """Validate arguments."""
if isinstance(arg, dict): if isinstance(arg, dict):
if not all([isinstance(v, Hashable) for v in arg.values()]): if not all(isinstance(v, Hashable) for v in arg.values()):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]): elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
def __iter__(self) -> Iterator[Any]: def __iter__(self) -> Iterator[Any]:
"""Iterator.""" """Iterator."""
@ -140,7 +139,7 @@ class ImmutableDict(Mapping[Any, Any]):
def __repr__(self) -> str: # pragma: no cover def __repr__(self) -> str: # pragma: no cover
"""Representation.""" """Representation."""
return "{!r}".format(self._d) return f"{self._d!r}"
__str__ = __repr__ __str__ = __repr__
@ -157,10 +156,10 @@ class Namespaces(ImmutableDict):
"""Validate arguments.""" """Validate arguments."""
if isinstance(arg, dict): if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]): if not all(isinstance(v, str) for v in arg.values()):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class CustomSelectors(ImmutableDict): class CustomSelectors(ImmutableDict):
@ -175,10 +174,10 @@ class CustomSelectors(ImmutableDict):
"""Validate arguments.""" """Validate arguments."""
if isinstance(arg, dict): if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]): if not all(isinstance(v, str) for v in arg.values()):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class Selector(Immutable): class Selector(Immutable):
@ -367,7 +366,7 @@ class SelectorList(Immutable):
"""Initialize.""" """Initialize."""
super().__init__( super().__init__(
selectors=tuple(selectors) if selectors is not None else tuple(), selectors=tuple(selectors) if selectors is not None else (),
is_not=is_not, is_not=is_not,
is_html=is_html is_html=is_html
) )

View file

@ -10,7 +10,7 @@ The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners). hasn't been tested extensively to make sure we aren't missing corners).
Example: Example:
-------
``` ```
>>> import soupsieve as sv >>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty() >>> sv.compile('this > that.class[name=value]').selectors.pretty()
@ -64,6 +64,7 @@ SelectorList(
is_not=False, is_not=False,
is_html=False) is_html=False)
``` ```
""" """
from __future__ import annotations from __future__ import annotations
import re import re
@ -123,16 +124,16 @@ def pretty(obj: Any) -> str: # pragma: no cover
index = m.end(0) index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'): if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4 indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent)) output.append(f'{m.group(0)}\n{" " * indent}')
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'): elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0)) output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'): elif name in ('lend', 'dend', 'tend'):
indent -= 4 indent -= 4
output.append(m.group(0)) output.append(m.group(0))
elif name in ('sep',): elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent)) output.append(f'{m.group(1)}\n{" " * indent}')
elif name in ('dsep',): elif name in ('dsep',):
output.append('{} '.format(m.group(1))) output.append(f'{m.group(1)} ')
break break
return ''.join(output) return ''.join(output)

View file

@ -37,7 +37,7 @@ class SelectorSyntaxError(Exception):
if pattern is not None and index is not None: if pattern is not None and index is not None:
# Format pattern to show line and column position # Format pattern to show line and column position
self.context, self.line, self.col = get_pattern_context(pattern, index) self.context, self.line, self.col = get_pattern_context(pattern, index)
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context) msg = f'{msg}\n line {self.line}:\n{self.context}'
super().__init__(msg) super().__init__(msg)
@ -105,7 +105,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
# we will render the output with just `\n`. We will still log the column # we will render the output with just `\n`. We will still log the column
# correctly though. # correctly though.
text.append('\n') text.append('\n')
text.append('{}{}'.format(indent, linetext)) text.append(f'{indent}{linetext}')
if offset is not None: if offset is not None:
text.append('\n') text.append('\n')
text.append(' ' * (col + offset) + '^') text.append(' ' * (col + offset) + '^')

View file

@ -3,7 +3,7 @@ arrow==1.3.0
backports.csv==1.0.7 backports.csv==1.0.7
backports.functools-lru-cache==2.0.0 backports.functools-lru-cache==2.0.0
backports.zoneinfo==0.2.1;python_version<"3.9" backports.zoneinfo==0.2.1;python_version<"3.9"
beautifulsoup4==4.12.2 beautifulsoup4==4.12.3
bleach==6.1.0 bleach==6.1.0
certifi==2024.2.2 certifi==2024.2.2
cheroot==10.0.0 cheroot==10.0.0