mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-07 21:51:14 -07:00
Bump beautifulsoup4 from 4.11.1 to 4.11.2 (#1987)
* Bump beautifulsoup4 from 4.11.1 to 4.11.2 Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.11.1 to 4.11.2. --- updated-dependencies: - dependency-name: beautifulsoup4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * Update beautifulsoup4==4.11.2 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
This commit is contained in:
parent
ded93ef2f5
commit
8e42757b2d
23 changed files with 449 additions and 537 deletions
|
@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
|||
provides methods and Pythonic idioms that make it easy to navigate,
|
||||
search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 3.5 and up. It works better if lxml
|
||||
Beautiful Soup works with Python 3.6 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
|
@ -15,8 +15,8 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
|||
"""
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.11.1"
|
||||
__copyright__ = "Copyright (c) 2004-2022 Leonard Richardson"
|
||||
__version__ = "4.11.2"
|
||||
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
|
@ -211,7 +211,7 @@ class BeautifulSoup(Tag):
|
|||
warnings.warn(
|
||||
'The "%s" argument to the BeautifulSoup constructor '
|
||||
'has been renamed to "%s."' % (old_name, new_name),
|
||||
DeprecationWarning
|
||||
DeprecationWarning, stacklevel=3
|
||||
)
|
||||
return kwargs.pop(old_name)
|
||||
return None
|
||||
|
@ -405,7 +405,8 @@ class BeautifulSoup(Tag):
|
|||
'The input looks more like a URL than markup. You may want to use'
|
||||
' an HTTP client like requests to get the document behind'
|
||||
' the URL, and feed that document to Beautiful Soup.',
|
||||
MarkupResemblesLocatorWarning
|
||||
MarkupResemblesLocatorWarning,
|
||||
stacklevel=3
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
@ -436,7 +437,7 @@ class BeautifulSoup(Tag):
|
|||
'The input looks more like a filename than markup. You may'
|
||||
' want to open this file and pass the filehandle into'
|
||||
' Beautiful Soup.',
|
||||
MarkupResemblesLocatorWarning
|
||||
MarkupResemblesLocatorWarning, stacklevel=3
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
@ -789,7 +790,7 @@ class BeautifulStoneSoup(BeautifulSoup):
|
|||
warnings.warn(
|
||||
'The BeautifulStoneSoup class is deprecated. Instead of using '
|
||||
'it, pass features="xml" into the BeautifulSoup constructor.',
|
||||
DeprecationWarning
|
||||
DeprecationWarning, stacklevel=2
|
||||
)
|
||||
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
|
||||
|
||||
|
|
|
@ -122,7 +122,7 @@ class TreeBuilder(object):
|
|||
|
||||
# A value for these tag/attribute combinations is a space- or
|
||||
# comma-separated list of CDATA, rather than a single CDATA.
|
||||
DEFAULT_CDATA_LIST_ATTRIBUTES = {}
|
||||
DEFAULT_CDATA_LIST_ATTRIBUTES = defaultdict(list)
|
||||
|
||||
# Whitespace should be preserved inside these tags.
|
||||
DEFAULT_PRESERVE_WHITESPACE_TAGS = set()
|
||||
|
|
|
@ -70,7 +70,10 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
|||
# ATM because the html5lib TreeBuilder doesn't use
|
||||
# UnicodeDammit.
|
||||
if exclude_encodings:
|
||||
warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
|
||||
warnings.warn(
|
||||
"You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.",
|
||||
stacklevel=3
|
||||
)
|
||||
|
||||
# html5lib only parses HTML, so if it's given XML that's worth
|
||||
# noting.
|
||||
|
@ -81,7 +84,10 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
|||
# These methods are defined by Beautiful Soup.
|
||||
def feed(self, markup):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
warnings.warn(
|
||||
"You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.",
|
||||
stacklevel=4
|
||||
)
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
self.underlying_builder.parser = parser
|
||||
extra_kwargs = dict()
|
||||
|
@ -249,9 +255,9 @@ class AttrList(object):
|
|||
# If this attribute is a multi-valued attribute for this element,
|
||||
# turn its value into a list.
|
||||
list_attr = self.element.cdata_list_attributes or {}
|
||||
if (name in list_attr.get('*')
|
||||
if (name in list_attr.get('*', [])
|
||||
or (self.element.name in list_attr
|
||||
and name in list_attr[self.element.name])):
|
||||
and name in list_attr.get(self.element.name, []))):
|
||||
# A node that is being cloned may have already undergone
|
||||
# this procedure.
|
||||
if not isinstance(value, list):
|
||||
|
|
|
@ -10,30 +10,9 @@ __all__ = [
|
|||
|
||||
from html.parser import HTMLParser
|
||||
|
||||
try:
|
||||
from html.parser import HTMLParseError
|
||||
except ImportError as e:
|
||||
# HTMLParseError is removed in Python 3.5. Since it can never be
|
||||
# thrown in 3.5, we can just define our own class as a placeholder.
|
||||
class HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
|
||||
# argument, which we'd like to set to False. Unfortunately,
|
||||
# http://bugs.python.org/issue13273 makes strict=True a better bet
|
||||
# before Python 3.2.3.
|
||||
#
|
||||
# At the end of this file, we monkeypatch HTMLParser so that
|
||||
# strict=True works well on Python 3.2.2.
|
||||
major, minor, release = sys.version_info[:3]
|
||||
CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
|
||||
CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
|
||||
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
|
||||
|
||||
|
||||
from bs4.element import (
|
||||
CData,
|
||||
Comment,
|
||||
|
@ -91,19 +70,6 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
|||
|
||||
self._initialize_xml_detector()
|
||||
|
||||
def error(self, msg):
|
||||
"""In Python 3, HTMLParser subclasses must implement error(), although
|
||||
this requirement doesn't appear to be documented.
|
||||
|
||||
In Python 2, HTMLParser implements error() by raising an exception,
|
||||
which we don't want to do.
|
||||
|
||||
In any event, this method is called only on very strange
|
||||
markup and our best strategy is to pretend it didn't happen
|
||||
and keep going.
|
||||
"""
|
||||
warnings.warn(msg)
|
||||
|
||||
def handle_startendtag(self, name, attrs):
|
||||
"""Handle an incoming empty-element tag.
|
||||
|
||||
|
@ -203,9 +169,10 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
|||
|
||||
:param name: Character number, possibly in hexadecimal.
|
||||
"""
|
||||
# XXX workaround for a bug in HTMLParser. Remove this once
|
||||
# it's fixed in all supported versions.
|
||||
# http://bugs.python.org/issue13633
|
||||
# TODO: This was originally a workaround for a bug in
|
||||
# HTMLParser. (http://bugs.python.org/issue13633) The bug has
|
||||
# been fixed, but removing this code still makes some
|
||||
# Beautiful Soup tests fail. This needs investigation.
|
||||
if name.startswith('x'):
|
||||
real_name = int(name.lstrip('x'), 16)
|
||||
elif name.startswith('X'):
|
||||
|
@ -333,9 +300,6 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
|||
parser_args = parser_args or []
|
||||
parser_kwargs = parser_kwargs or {}
|
||||
parser_kwargs.update(extra_parser_kwargs)
|
||||
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
|
||||
parser_kwargs['strict'] = False
|
||||
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
|
||||
parser_kwargs['convert_charrefs'] = False
|
||||
self.parser_args = (parser_args, parser_kwargs)
|
||||
|
||||
|
@ -395,105 +359,6 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
|||
args, kwargs = self.parser_args
|
||||
parser = BeautifulSoupHTMLParser(*args, **kwargs)
|
||||
parser.soup = self.soup
|
||||
try:
|
||||
parser.feed(markup)
|
||||
parser.close()
|
||||
except HTMLParseError as e:
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
parser.already_closed_empty_element = []
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
# string.
|
||||
#
|
||||
# XXX This code can be removed once most Python 3 users are on 3.2.3.
|
||||
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
|
||||
import re
|
||||
attrfind_tolerant = re.compile(
|
||||
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
|
||||
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
|
||||
|
||||
locatestarttagend = re.compile(r"""
|
||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||
(?:\s+ # whitespace before attribute name
|
||||
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
|
||||
(?:\s*=\s* # value indicator
|
||||
(?:'[^']*' # LITA-enclosed value
|
||||
|\"[^\"]*\" # LIT-enclosed value
|
||||
|[^'\">\s]+ # bare value
|
||||
)
|
||||
)?
|
||||
)
|
||||
)*
|
||||
\s* # trailing whitespace
|
||||
""", re.VERBOSE)
|
||||
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
|
||||
|
||||
from html.parser import tagfind, attrfind
|
||||
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
endpos = self.check_for_whole_start_tag(i)
|
||||
if endpos < 0:
|
||||
return endpos
|
||||
rawdata = self.rawdata
|
||||
self.__starttag_text = rawdata[i:endpos]
|
||||
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
match = tagfind.match(rawdata, i+1)
|
||||
assert match, 'unexpected call to parse_starttag()'
|
||||
k = match.end()
|
||||
self.lasttag = tag = rawdata[i+1:k].lower()
|
||||
while k < endpos:
|
||||
if self.strict:
|
||||
m = attrfind.match(rawdata, k)
|
||||
else:
|
||||
m = attrfind_tolerant.match(rawdata, k)
|
||||
if not m:
|
||||
break
|
||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = None
|
||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
if attrvalue:
|
||||
attrvalue = self.unescape(attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = m.end()
|
||||
|
||||
end = rawdata[k:endpos].strip()
|
||||
if end not in (">", "/>"):
|
||||
lineno, offset = self.getpos()
|
||||
if "\n" in self.__starttag_text:
|
||||
lineno = lineno + self.__starttag_text.count("\n")
|
||||
offset = len(self.__starttag_text) \
|
||||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
if self.strict:
|
||||
self.error("junk characters in start tag: %r"
|
||||
% (rawdata[k:endpos][:20],))
|
||||
self.handle_data(rawdata[i:endpos])
|
||||
return endpos
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
else:
|
||||
self.handle_starttag(tag, attrs)
|
||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
||||
self.set_cdata_mode(tag)
|
||||
return endpos
|
||||
|
||||
def set_cdata_mode(self, elem):
|
||||
self.cdata_elem = elem.lower()
|
||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||
|
||||
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
|
||||
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
|
||||
|
||||
CONSTRUCTOR_TAKES_STRICT = True
|
||||
|
|
|
@ -496,13 +496,16 @@ class PageElement(object):
|
|||
def extend(self, tags):
|
||||
"""Appends the given PageElements to this one's contents.
|
||||
|
||||
:param tags: A list of PageElements.
|
||||
:param tags: A list of PageElements. If a single Tag is
|
||||
provided instead, this PageElement's contents will be extended
|
||||
with that Tag's contents.
|
||||
"""
|
||||
if isinstance(tags, Tag):
|
||||
# Calling self.append() on another tag's contents will change
|
||||
# the list we're iterating over. Make a list that won't
|
||||
# change.
|
||||
tags = list(tags.contents)
|
||||
tags = tags.contents
|
||||
if isinstance(tags, list):
|
||||
# Moving items around the tree may change their position in
|
||||
# the original list. Make a list that won't change.
|
||||
tags = list(tags)
|
||||
for tag in tags:
|
||||
self.append(tag)
|
||||
|
||||
|
@ -586,8 +589,9 @@ class PageElement(object):
|
|||
:kwargs: A dictionary of filters on attribute values.
|
||||
:return: A ResultSet containing PageElements.
|
||||
"""
|
||||
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||
return self._find_all(name, attrs, string, limit, self.next_elements,
|
||||
**kwargs)
|
||||
_stacklevel=_stacklevel+1, **kwargs)
|
||||
findAllNext = find_all_next # BS3
|
||||
|
||||
def find_next_sibling(self, name=None, attrs={}, string=None, **kwargs):
|
||||
|
@ -624,8 +628,11 @@ class PageElement(object):
|
|||
:return: A ResultSet of PageElements.
|
||||
:rtype: bs4.element.ResultSet
|
||||
"""
|
||||
return self._find_all(name, attrs, string, limit,
|
||||
self.next_siblings, **kwargs)
|
||||
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||
return self._find_all(
|
||||
name, attrs, string, limit,
|
||||
self.next_siblings, _stacklevel=_stacklevel+1, **kwargs
|
||||
)
|
||||
findNextSiblings = find_next_siblings # BS3
|
||||
fetchNextSiblings = find_next_siblings # BS2
|
||||
|
||||
|
@ -663,8 +670,11 @@ class PageElement(object):
|
|||
:return: A ResultSet of PageElements.
|
||||
:rtype: bs4.element.ResultSet
|
||||
"""
|
||||
return self._find_all(name, attrs, string, limit, self.previous_elements,
|
||||
**kwargs)
|
||||
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||
return self._find_all(
|
||||
name, attrs, string, limit, self.previous_elements,
|
||||
_stacklevel=_stacklevel+1, **kwargs
|
||||
)
|
||||
findAllPrevious = find_all_previous # BS3
|
||||
fetchPrevious = find_all_previous # BS2
|
||||
|
||||
|
@ -702,8 +712,11 @@ class PageElement(object):
|
|||
:return: A ResultSet of PageElements.
|
||||
:rtype: bs4.element.ResultSet
|
||||
"""
|
||||
return self._find_all(name, attrs, string, limit,
|
||||
self.previous_siblings, **kwargs)
|
||||
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||
return self._find_all(
|
||||
name, attrs, string, limit,
|
||||
self.previous_siblings, _stacklevel=_stacklevel+1, **kwargs
|
||||
)
|
||||
findPreviousSiblings = find_previous_siblings # BS3
|
||||
fetchPreviousSiblings = find_previous_siblings # BS2
|
||||
|
||||
|
@ -724,7 +737,7 @@ class PageElement(object):
|
|||
# NOTE: We can't use _find_one because findParents takes a different
|
||||
# set of arguments.
|
||||
r = None
|
||||
l = self.find_parents(name, attrs, 1, **kwargs)
|
||||
l = self.find_parents(name, attrs, 1, _stacklevel=3, **kwargs)
|
||||
if l:
|
||||
r = l[0]
|
||||
return r
|
||||
|
@ -744,8 +757,9 @@ class PageElement(object):
|
|||
:return: A PageElement.
|
||||
:rtype: bs4.element.Tag | bs4.element.NavigableString
|
||||
"""
|
||||
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||
return self._find_all(name, attrs, None, limit, self.parents,
|
||||
**kwargs)
|
||||
_stacklevel=_stacklevel+1, **kwargs)
|
||||
findParents = find_parents # BS3
|
||||
fetchParents = find_parents # BS2
|
||||
|
||||
|
@ -771,19 +785,20 @@ class PageElement(object):
|
|||
|
||||
def _find_one(self, method, name, attrs, string, **kwargs):
|
||||
r = None
|
||||
l = method(name, attrs, string, 1, **kwargs)
|
||||
l = method(name, attrs, string, 1, _stacklevel=4, **kwargs)
|
||||
if l:
|
||||
r = l[0]
|
||||
return r
|
||||
|
||||
def _find_all(self, name, attrs, string, limit, generator, **kwargs):
|
||||
"Iterates over a generator looking for things that match."
|
||||
_stacklevel = kwargs.pop('_stacklevel', 3)
|
||||
|
||||
if string is None and 'text' in kwargs:
|
||||
string = kwargs.pop('text')
|
||||
warnings.warn(
|
||||
"The 'text' argument to find()-type methods is deprecated. Use 'string' instead.",
|
||||
DeprecationWarning
|
||||
DeprecationWarning, stacklevel=_stacklevel
|
||||
)
|
||||
|
||||
if isinstance(name, SoupStrainer):
|
||||
|
@ -1306,7 +1321,8 @@ class Tag(PageElement):
|
|||
sourceline=self.sourceline, sourcepos=self.sourcepos,
|
||||
can_be_empty_element=self.can_be_empty_element,
|
||||
cdata_list_attributes=self.cdata_list_attributes,
|
||||
preserve_whitespace_tags=self.preserve_whitespace_tags
|
||||
preserve_whitespace_tags=self.preserve_whitespace_tags,
|
||||
interesting_string_types=self.interesting_string_types
|
||||
)
|
||||
for attr in ('can_be_empty_element', 'hidden'):
|
||||
setattr(clone, attr, getattr(self, attr))
|
||||
|
@ -1558,7 +1574,7 @@ class Tag(PageElement):
|
|||
'.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict(
|
||||
name=tag_name
|
||||
),
|
||||
DeprecationWarning
|
||||
DeprecationWarning, stacklevel=2
|
||||
)
|
||||
return self.find(tag_name)
|
||||
# We special case contents to avoid recursion.
|
||||
|
@ -1862,7 +1878,8 @@ class Tag(PageElement):
|
|||
:rtype: bs4.element.Tag | bs4.element.NavigableString
|
||||
"""
|
||||
r = None
|
||||
l = self.find_all(name, attrs, recursive, string, 1, **kwargs)
|
||||
l = self.find_all(name, attrs, recursive, string, 1, _stacklevel=3,
|
||||
**kwargs)
|
||||
if l:
|
||||
r = l[0]
|
||||
return r
|
||||
|
@ -1889,7 +1906,9 @@ class Tag(PageElement):
|
|||
generator = self.descendants
|
||||
if not recursive:
|
||||
generator = self.children
|
||||
return self._find_all(name, attrs, string, limit, generator, **kwargs)
|
||||
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||
return self._find_all(name, attrs, string, limit, generator,
|
||||
_stacklevel=_stacklevel+1, **kwargs)
|
||||
findAll = find_all # BS3
|
||||
findChildren = find_all # BS2
|
||||
|
||||
|
@ -1993,7 +2012,7 @@ class Tag(PageElement):
|
|||
"""
|
||||
warnings.warn(
|
||||
'has_key is deprecated. Use has_attr(key) instead.',
|
||||
DeprecationWarning
|
||||
DeprecationWarning, stacklevel=2
|
||||
)
|
||||
return self.has_attr(key)
|
||||
|
||||
|
@ -2024,7 +2043,7 @@ class SoupStrainer(object):
|
|||
string = kwargs.pop('text')
|
||||
warnings.warn(
|
||||
"The 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead.",
|
||||
DeprecationWarning
|
||||
DeprecationWarning, stacklevel=2
|
||||
)
|
||||
|
||||
self.name = self._normalize_search_value(name)
|
||||
|
|
|
@ -149,14 +149,14 @@ class HTMLFormatter(Formatter):
|
|||
"""A generic Formatter for HTML."""
|
||||
REGISTRY = {}
|
||||
def __init__(self, *args, **kwargs):
|
||||
return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
|
||||
super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
|
||||
|
||||
|
||||
class XMLFormatter(Formatter):
|
||||
"""A generic Formatter for XML."""
|
||||
REGISTRY = {}
|
||||
def __init__(self, *args, **kwargs):
|
||||
return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
|
||||
super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
|
||||
|
||||
|
||||
# Set up aliases for the default formatters.
|
||||
|
|
|
@ -29,6 +29,29 @@ from bs4.builder import (
|
|||
)
|
||||
default_builder = HTMLParserTreeBuilder
|
||||
|
||||
# Some tests depend on specific third-party libraries. We use
|
||||
# @pytest.mark.skipIf on the following conditionals to skip them
|
||||
# if the libraries are not installed.
|
||||
try:
|
||||
from soupsieve import SelectorSyntaxError
|
||||
SOUP_SIEVE_PRESENT = True
|
||||
except ImportError:
|
||||
SOUP_SIEVE_PRESENT = False
|
||||
|
||||
try:
|
||||
import html5lib
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError:
|
||||
HTML5LIB_PRESENT = False
|
||||
|
||||
try:
|
||||
import lxml.etree
|
||||
LXML_PRESENT = True
|
||||
LXML_VERSION = lxml.etree.LXML_VERSION
|
||||
except ImportError:
|
||||
LXML_PRESENT = False
|
||||
LXML_VERSION = (0,)
|
||||
|
||||
BAD_DOCUMENT = """A bare string
|
||||
<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
|
||||
<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
|
||||
|
@ -258,10 +281,10 @@ class TreeBuilderSmokeTest(object):
|
|||
|
||||
@pytest.mark.parametrize(
|
||||
"multi_valued_attributes",
|
||||
[None, dict(b=['class']), {'*': ['notclass']}]
|
||||
[None, {}, dict(b=['class']), {'*': ['notclass']}]
|
||||
)
|
||||
def test_attribute_not_multi_valued(self, multi_valued_attributes):
|
||||
markup = '<a class="a b c">'
|
||||
markup = '<html xmlns="http://www.w3.org/1999/xhtml"><a class="a b c"></html>'
|
||||
soup = self.soup(markup, multi_valued_attributes=multi_valued_attributes)
|
||||
assert soup.a['class'] == 'a b c'
|
||||
|
||||
|
@ -820,26 +843,27 @@ Hello, world!
|
|||
soup = self.soup(text)
|
||||
assert soup.p.encode("utf-8") == expected
|
||||
|
||||
def test_real_iso_latin_document(self):
|
||||
def test_real_iso_8859_document(self):
|
||||
# Smoke test of interrelated functionality, using an
|
||||
# easy-to-understand document.
|
||||
|
||||
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
|
||||
unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
||||
# Here it is in Unicode. Note that it claims to be in ISO-8859-1.
|
||||
unicode_html = '<html><head><meta content="text/html; charset=ISO-8859-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
||||
|
||||
# That's because we're going to encode it into ISO-Latin-1, and use
|
||||
# that to test.
|
||||
# That's because we're going to encode it into ISO-8859-1,
|
||||
# and use that to test.
|
||||
iso_latin_html = unicode_html.encode("iso-8859-1")
|
||||
|
||||
# Parse the ISO-Latin-1 HTML.
|
||||
# Parse the ISO-8859-1 HTML.
|
||||
soup = self.soup(iso_latin_html)
|
||||
|
||||
# Encode it to UTF-8.
|
||||
result = soup.encode("utf-8")
|
||||
|
||||
# What do we expect the result to look like? Well, it would
|
||||
# look like unicode_html, except that the META tag would say
|
||||
# UTF-8 instead of ISO-Latin-1.
|
||||
expected = unicode_html.replace("ISO-Latin-1", "utf-8")
|
||||
# UTF-8 instead of ISO-8859-1.
|
||||
expected = unicode_html.replace("ISO-8859-1", "utf-8")
|
||||
|
||||
# And, of course, it would be in UTF-8, not Unicode.
|
||||
expected = expected.encode("utf-8")
|
||||
|
@ -1177,15 +1201,3 @@ class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
|||
assert isinstance(soup.contents[0], Comment)
|
||||
assert soup.contents[0] == '?xml version="1.0" encoding="utf-8"?'
|
||||
assert "html" == soup.contents[0].next_element.name
|
||||
|
||||
def skipIf(condition, reason):
|
||||
def nothing(test, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def decorator(test_item):
|
||||
if condition:
|
||||
return nothing
|
||||
else:
|
||||
return test_item
|
||||
|
||||
return decorator
|
||||
|
|
|
@ -10,22 +10,23 @@ from bs4.builder import (
|
|||
TreeBuilderRegistry,
|
||||
)
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError:
|
||||
HTML5LIB_PRESENT = False
|
||||
from . import (
|
||||
HTML5LIB_PRESENT,
|
||||
LXML_PRESENT,
|
||||
)
|
||||
|
||||
try:
|
||||
if HTML5LIB_PRESENT:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
|
||||
if LXML_PRESENT:
|
||||
from bs4.builder import (
|
||||
LXMLTreeBuilderForXML,
|
||||
LXMLTreeBuilder,
|
||||
)
|
||||
LXML_PRESENT = True
|
||||
except ImportError:
|
||||
LXML_PRESENT = False
|
||||
|
||||
|
||||
# TODO: Split out the lxml and html5lib tests into their own classes
|
||||
# and gate with pytest.mark.skipIf.
|
||||
class TestBuiltInRegistry(object):
|
||||
"""Test the built-in registry with the default builders registered."""
|
||||
|
||||
|
|
|
@ -17,25 +17,23 @@ class TestUnicodeDammit(object):
|
|||
dammit = UnicodeDammit(markup)
|
||||
assert dammit.unicode_markup == markup
|
||||
|
||||
def test_smart_quotes_to_unicode(self):
|
||||
@pytest.mark.parametrize(
|
||||
"smart_quotes_to,expect_converted",
|
||||
[(None, "\u2018\u2019\u201c\u201d"),
|
||||
("xml", "‘’“”"),
|
||||
("html", "‘’“”"),
|
||||
("ascii", "''" + '""'),
|
||||
]
|
||||
)
|
||||
def test_smart_quotes_to(self, smart_quotes_to, expect_converted):
|
||||
"""Verify the functionality of the smart_quotes_to argument
|
||||
to the UnicodeDammit constructor."""
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup)
|
||||
assert dammit.unicode_markup == "<foo>\u2018\u2019\u201c\u201d</foo>"
|
||||
|
||||
def test_smart_quotes_to_xml_entities(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
|
||||
assert dammit.unicode_markup == "<foo>‘’“”</foo>"
|
||||
|
||||
def test_smart_quotes_to_html_entities(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="html")
|
||||
assert dammit.unicode_markup == "<foo>‘’“”</foo>"
|
||||
|
||||
def test_smart_quotes_to_ascii(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
|
||||
assert dammit.unicode_markup == """<foo>''""</foo>"""
|
||||
converted = UnicodeDammit(
|
||||
markup, known_definite_encodings=["windows-1252"],
|
||||
smart_quotes_to=smart_quotes_to
|
||||
).unicode_markup
|
||||
assert converted == "<foo>{}</foo>".format(expect_converted)
|
||||
|
||||
def test_detect_utf8(self):
|
||||
utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
|
||||
|
@ -275,23 +273,24 @@ class TestEntitySubstitution(object):
|
|||
def setup_method(self):
|
||||
self.sub = EntitySubstitution
|
||||
|
||||
def test_simple_html_substitution(self):
|
||||
# Unicode characters corresponding to named HTML entites
|
||||
# are substituted, and no others.
|
||||
s = "foo\u2200\N{SNOWMAN}\u00f5bar"
|
||||
assert self.sub.substitute_html(s) == "foo∀\N{SNOWMAN}õbar"
|
||||
|
||||
def test_smart_quote_substitution(self):
|
||||
@pytest.mark.parametrize(
|
||||
"original,substituted",
|
||||
[
|
||||
# Basic case. Unicode characters corresponding to named
|
||||
# HTML entites are substituted; others are not.
|
||||
("foo\u2200\N{SNOWMAN}\u00f5bar",
|
||||
"foo∀\N{SNOWMAN}õbar"),
|
||||
|
||||
# MS smart quotes are a common source of frustration, so we
|
||||
# give them a special test.
|
||||
quotes = b"\x91\x92foo\x93\x94"
|
||||
dammit = UnicodeDammit(quotes)
|
||||
assert self.sub.substitute_html(dammit.markup) == "‘’foo“”"
|
||||
('‘’foo“”', "‘’foo“”"),
|
||||
]
|
||||
)
|
||||
def test_substitute_html(self, original, substituted):
|
||||
assert self.sub.substitute_html(original) == substituted
|
||||
|
||||
def test_html5_entity(self):
|
||||
# Some HTML5 entities correspond to single- or multi-character
|
||||
# Unicode sequences.
|
||||
|
||||
for entity, u in (
|
||||
# A few spot checks of our ability to recognize
|
||||
# special character sequences and convert them
|
||||
|
|
|
@ -1,27 +1,26 @@
|
|||
"""Tests to ensure that the html5lib tree builder generates good trees."""
|
||||
|
||||
import pytest
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError as e:
|
||||
HTML5LIB_PRESENT = False
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import SoupStrainer
|
||||
from . import (
|
||||
HTML5LIB_PRESENT,
|
||||
HTML5TreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
@skipIf(
|
||||
@pytest.mark.skipif(
|
||||
not HTML5LIB_PRESENT,
|
||||
"html5lib seems not to be present, not testing its tree builder.")
|
||||
reason="html5lib seems not to be present, not testing its tree builder."
|
||||
)
|
||||
class TestHTML5LibBuilder(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
"""See ``HTML5TreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
return HTML5TreeBuilder
|
||||
|
||||
def test_soupstrainer(self):
|
||||
|
@ -29,10 +28,12 @@ class TestHTML5LibBuilder(SoupTest, HTML5TreeBuilderSmokeTest):
|
|||
strainer = SoupStrainer("b")
|
||||
markup = "<p>A <b>bold</b> statement.</p>"
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(markup, parse_only=strainer)
|
||||
soup = BeautifulSoup(markup, "html5lib", parse_only=strainer)
|
||||
assert soup.decode() == self.document_for(markup)
|
||||
|
||||
assert "the html5lib tree builder doesn't support parse_only" in str(w[0].message)
|
||||
[warning] = w
|
||||
assert warning.filename == __file__
|
||||
assert "the html5lib tree builder doesn't support parse_only" in str(warning.message)
|
||||
|
||||
def test_correctly_nested_tables(self):
|
||||
"""html5lib inserts <tbody> tags where other parsers don't."""
|
||||
|
|
|
@ -122,15 +122,3 @@ class TestHTMLParserTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
|||
with_element = div.encode(formatter="html")
|
||||
expect = b"<div>%s</div>" % output_element
|
||||
assert with_element == expect
|
||||
|
||||
class TestHTMLParserSubclass(SoupTest):
|
||||
def test_error(self):
|
||||
"""Verify that our HTMLParser subclass implements error() in a way
|
||||
that doesn't cause a crash.
|
||||
"""
|
||||
parser = BeautifulSoupHTMLParser()
|
||||
with warnings.catch_warnings(record=True) as warns:
|
||||
parser.error("don't crash")
|
||||
[warning] = warns
|
||||
assert "don't crash" == str(warning.message)
|
||||
|
||||
|
|
|
@ -1,16 +1,10 @@
|
|||
"""Tests to ensure that the lxml tree builder generates good trees."""
|
||||
|
||||
import pickle
|
||||
import pytest
|
||||
import re
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import lxml.etree
|
||||
LXML_PRESENT = True
|
||||
LXML_VERSION = lxml.etree.LXML_VERSION
|
||||
except ImportError as e:
|
||||
LXML_PRESENT = False
|
||||
LXML_VERSION = (0,)
|
||||
from . import LXML_PRESENT, LXML_VERSION
|
||||
|
||||
if LXML_PRESENT:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
|
@ -23,13 +17,14 @@ from bs4.element import Comment, Doctype, SoupStrainer
|
|||
from . import (
|
||||
HTMLTreeBuilderSmokeTest,
|
||||
XMLTreeBuilderSmokeTest,
|
||||
SOUP_SIEVE_PRESENT,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
@skipIf(
|
||||
@pytest.mark.skipif(
|
||||
not LXML_PRESENT,
|
||||
"lxml seems not to be present, not testing its tree builder.")
|
||||
reason="lxml seems not to be present, not testing its tree builder."
|
||||
)
|
||||
class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||
|
||||
|
@ -54,9 +49,10 @@ class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
|||
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
|
||||
# test if an old version of lxml is installed.
|
||||
|
||||
@skipIf(
|
||||
@pytest.mark.skipif(
|
||||
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
|
||||
"Skipping doctype test for old version of lxml to avoid segfault.")
|
||||
reason="Skipping doctype test for old version of lxml to avoid segfault."
|
||||
)
|
||||
def test_empty_doctype(self):
|
||||
soup = self.soup("<!DOCTYPE>")
|
||||
doctype = soup.contents[0]
|
||||
|
@ -68,7 +64,9 @@ class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
|||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = BeautifulStoneSoup("<b />")
|
||||
assert "<b/>" == str(soup.b)
|
||||
assert "BeautifulStoneSoup class is deprecated" in str(w[0].message)
|
||||
[warning] = w
|
||||
assert warning.filename == __file__
|
||||
assert "BeautifulStoneSoup class is deprecated" in str(warning.message)
|
||||
|
||||
def test_tracking_line_numbers(self):
|
||||
# The lxml TreeBuilder cannot keep track of line numbers from
|
||||
|
@ -85,9 +83,10 @@ class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
|||
assert "sourceline" == soup.p.sourceline.name
|
||||
assert "sourcepos" == soup.p.sourcepos.name
|
||||
|
||||
@skipIf(
|
||||
@pytest.mark.skipif(
|
||||
not LXML_PRESENT,
|
||||
"lxml seems not to be present, not testing its XML tree builder.")
|
||||
reason="lxml seems not to be present, not testing its XML tree builder."
|
||||
)
|
||||
class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
|
||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||
|
||||
|
@ -148,6 +147,9 @@ class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
|
|||
}
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed"
|
||||
)
|
||||
def test_namespace_interaction_with_select_and_find(self):
|
||||
# Demonstrate how namespaces interact with select* and
|
||||
# find* methods.
|
||||
|
|
|
@ -3,15 +3,18 @@ import copy
|
|||
import pickle
|
||||
import pytest
|
||||
|
||||
from soupsieve import SelectorSyntaxError
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
SoupStrainer,
|
||||
)
|
||||
from . import SoupTest
|
||||
from . import (
|
||||
SoupTest,
|
||||
SOUP_SIEVE_PRESENT,
|
||||
)
|
||||
|
||||
if SOUP_SIEVE_PRESENT:
|
||||
from soupsieve import SelectorSyntaxError
|
||||
|
||||
class TestEncoding(SoupTest):
|
||||
"""Test the ability to encode objects into strings."""
|
||||
|
@ -213,6 +216,7 @@ class TestFormatters(SoupTest):
|
|||
assert soup.contents[0].name == 'pre'
|
||||
|
||||
|
||||
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
|
||||
class TestCSSSelectors(SoupTest):
|
||||
"""Test basic CSS selector functionality.
|
||||
|
||||
|
@ -694,6 +698,7 @@ class TestPersistence(SoupTest):
|
|||
assert tag.can_be_empty_element == copied.can_be_empty_element
|
||||
assert tag.cdata_list_attributes == copied.cdata_list_attributes
|
||||
assert tag.preserve_whitespace_tags == copied.preserve_whitespace_tags
|
||||
assert tag.interesting_string_types == copied.interesting_string_types
|
||||
|
||||
def test_unicode_pickle(self):
|
||||
# A tree containing Unicode characters can be pickled.
|
||||
|
|
|
@ -30,19 +30,11 @@ from bs4.element import (
|
|||
|
||||
from . import (
|
||||
default_builder,
|
||||
LXML_PRESENT,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
LXML_PRESENT = True
|
||||
except ImportError as e:
|
||||
LXML_PRESENT = False
|
||||
|
||||
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
|
||||
|
||||
class TestConstructor(SoupTest):
|
||||
|
||||
def test_short_unicode_input(self):
|
||||
|
@ -139,7 +131,7 @@ class TestConstructor(SoupTest):
|
|||
assert " an id " == a['id']
|
||||
assert ["a", "class"] == a['class']
|
||||
|
||||
# TreeBuilder takes an argument called 'mutli_valued_attributes' which lets
|
||||
# TreeBuilder takes an argument called 'multi_valued_attributes' which lets
|
||||
# you customize or disable this. As always, you can customize the TreeBuilder
|
||||
# by passing in a keyword argument to the BeautifulSoup constructor.
|
||||
soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)
|
||||
|
@ -219,10 +211,17 @@ class TestConstructor(SoupTest):
|
|||
|
||||
|
||||
class TestWarnings(SoupTest):
|
||||
# Note that some of the tests in this class create BeautifulSoup
|
||||
# objects directly rather than using self.soup(). That's
|
||||
# because SoupTest.soup is defined in a different file,
|
||||
# which will throw off the assertion in _assert_warning
|
||||
# that the code that triggered the warning is in the same
|
||||
# file as the test.
|
||||
|
||||
def _assert_warning(self, warnings, cls):
|
||||
for w in warnings:
|
||||
if isinstance(w.message, cls):
|
||||
assert w.filename == __file__
|
||||
return w
|
||||
raise Exception("%s warning not found in %r" % (cls, warnings))
|
||||
|
||||
|
@ -243,13 +242,17 @@ class TestWarnings(SoupTest):
|
|||
|
||||
def test_no_warning_if_explicit_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = BeautifulSoup("<a><b></b></a>", "html.parser")
|
||||
soup = self.soup("<a><b></b></a>")
|
||||
assert [] == w
|
||||
|
||||
def test_parseOnlyThese_renamed_to_parse_only(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
|
||||
msg = str(w[0].message)
|
||||
soup = BeautifulSoup(
|
||||
"<a><b></b></a>", "html.parser",
|
||||
parseOnlyThese=SoupStrainer("b"),
|
||||
)
|
||||
warning = self._assert_warning(w, DeprecationWarning)
|
||||
msg = str(warning.message)
|
||||
assert "parseOnlyThese" in msg
|
||||
assert "parse_only" in msg
|
||||
assert b"<b></b>" == soup.encode()
|
||||
|
@ -257,8 +260,11 @@ class TestWarnings(SoupTest):
|
|||
def test_fromEncoding_renamed_to_from_encoding(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
utf8 = b"\xc3\xa9"
|
||||
soup = self.soup(utf8, fromEncoding="utf8")
|
||||
msg = str(w[0].message)
|
||||
soup = BeautifulSoup(
|
||||
utf8, "html.parser", fromEncoding="utf8"
|
||||
)
|
||||
warning = self._assert_warning(w, DeprecationWarning)
|
||||
msg = str(warning.message)
|
||||
assert "fromEncoding" in msg
|
||||
assert "from_encoding" in msg
|
||||
assert "utf8" == soup.original_encoding
|
||||
|
@ -276,7 +282,7 @@ class TestWarnings(SoupTest):
|
|||
# A warning is issued if the "markup" looks like the name of
|
||||
# an HTML or text file, or a full path to a file on disk.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("markup" + extension)
|
||||
soup = BeautifulSoup("markup" + extension, "html.parser")
|
||||
warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
|
||||
assert "looks more like a filename" in str(warning.message)
|
||||
|
||||
|
@ -295,7 +301,7 @@ class TestWarnings(SoupTest):
|
|||
def test_url_warning_with_bytes_url(self):
|
||||
url = b"http://www.crummybytes.com/"
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(url)
|
||||
soup = BeautifulSoup(url, "html.parser")
|
||||
warning = self._assert_warning(
|
||||
warning_list, MarkupResemblesLocatorWarning
|
||||
)
|
||||
|
@ -307,7 +313,7 @@ class TestWarnings(SoupTest):
|
|||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
# note - this url must differ from the bytes one otherwise
|
||||
# python's warnings system swallows the second warning
|
||||
soup = self.soup(url)
|
||||
soup = BeautifulSoup(url, "html.parser")
|
||||
warning = self._assert_warning(
|
||||
warning_list, MarkupResemblesLocatorWarning
|
||||
)
|
||||
|
@ -348,9 +354,12 @@ class TestNewTag(SoupTest):
|
|||
assert dict(bar="baz", name="a name") == new_tag.attrs
|
||||
assert None == new_tag.parent
|
||||
|
||||
def test_tag_inherits_self_closing_rules_from_builder(self):
|
||||
if LXML_PRESENT:
|
||||
xml_soup = BeautifulSoup("", "lxml-xml")
|
||||
@pytest.mark.skipif(
|
||||
not LXML_PRESENT,
|
||||
reason="lxml not installed, cannot parse XML document"
|
||||
)
|
||||
def test_xml_tag_inherits_self_closing_rules_from_builder(self):
|
||||
xml_soup = BeautifulSoup("", "xml")
|
||||
xml_br = xml_soup.new_tag("br")
|
||||
xml_p = xml_soup.new_tag("p")
|
||||
|
||||
|
@ -359,6 +368,7 @@ class TestNewTag(SoupTest):
|
|||
assert b"<br/>" == xml_br.encode()
|
||||
assert b"<p/>" == xml_p.encode()
|
||||
|
||||
def test_tag_inherits_self_closing_rules_from_builder(self):
|
||||
html_soup = BeautifulSoup("", "html.parser")
|
||||
html_br = html_soup.new_tag("br")
|
||||
html_p = html_soup.new_tag("p")
|
||||
|
@ -450,13 +460,3 @@ class TestEncodingConversion(SoupTest):
|
|||
# The internal data structures can be encoded as UTF-8.
|
||||
soup_from_unicode = self.soup(self.unicode_data)
|
||||
assert soup_from_unicode.encode('utf-8') == self.utf8_data
|
||||
|
||||
@skipIf(
|
||||
PYTHON_3_PRE_3_2,
|
||||
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
|
||||
def test_attribute_name_containing_unicode_characters(self):
|
||||
markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
|
||||
assert self.soup(markup).div.encode("utf8") == markup.encode("utf8")
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -33,7 +33,6 @@ from bs4.element import (
|
|||
)
|
||||
from . import (
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
class TestFind(SoupTest):
|
||||
|
@ -910,12 +909,16 @@ class TestTreeModification(SoupTest):
|
|||
soup.a.extend(l)
|
||||
assert "<a><g></g><f></f><e></e><d></d><c></c><b></b></a>" == soup.decode()
|
||||
|
||||
def test_extend_with_another_tags_contents(self):
|
||||
@pytest.mark.parametrize(
|
||||
"get_tags", [lambda tag: tag, lambda tag: tag.contents]
|
||||
)
|
||||
def test_extend_with_another_tags_contents(self, get_tags):
|
||||
data = '<body><div id="d1"><a>1</a><a>2</a><a>3</a><a>4</a></div><div id="d2"></div></body>'
|
||||
soup = self.soup(data)
|
||||
d1 = soup.find('div', id='d1')
|
||||
d2 = soup.find('div', id='d2')
|
||||
d2.extend(d1)
|
||||
tags = get_tags(d1)
|
||||
d2.extend(tags)
|
||||
assert '<div id="d1"></div>' == d1.decode()
|
||||
assert '<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>' == d2.decode()
|
||||
|
||||
|
@ -1272,19 +1275,30 @@ class TestTreeModification(SoupTest):
|
|||
|
||||
class TestDeprecatedArguments(SoupTest):
|
||||
|
||||
def test_find_type_method_string(self):
|
||||
@pytest.mark.parametrize(
|
||||
"method_name", [
|
||||
"find", "find_all", "find_parent", "find_parents",
|
||||
"find_next", "find_all_next", "find_previous",
|
||||
"find_all_previous", "find_next_sibling", "find_next_siblings",
|
||||
"find_previous_sibling", "find_previous_siblings",
|
||||
]
|
||||
)
|
||||
def test_find_type_method_string(self, method_name):
|
||||
soup = self.soup("<a>some</a><b>markup</b>")
|
||||
method = getattr(soup.b, method_name)
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
[result] = soup.find_all(text='markup')
|
||||
assert result == 'markup'
|
||||
assert result.parent.name == 'b'
|
||||
msg = str(w[0].message)
|
||||
method(text='markup')
|
||||
[warning] = w
|
||||
assert warning.filename == __file__
|
||||
msg = str(warning.message)
|
||||
assert msg == "The 'text' argument to find()-type methods is deprecated. Use 'string' instead."
|
||||
|
||||
def test_soupstrainer_constructor_string(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
strainer = SoupStrainer(text="text")
|
||||
assert strainer.text == 'text'
|
||||
msg = str(w[0].message)
|
||||
[warning] = w
|
||||
msg = str(warning.message)
|
||||
assert warning.filename == __file__
|
||||
assert msg == "The 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead."
|
||||
|
||||
|
|
|
@ -25,13 +25,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from .__meta__ import __version__, __version_info__ # noqa: F401
|
||||
from . import css_parser as cp
|
||||
from . import css_match as cm
|
||||
from . import css_types as ct
|
||||
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
||||
import bs4 # type: ignore[import]
|
||||
from typing import Dict, Optional, Any, List, Iterator, Iterable
|
||||
from typing import Optional, Any, Iterator, Iterable
|
||||
|
||||
__all__ = (
|
||||
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
||||
|
@ -44,17 +45,14 @@ SoupSieve = cm.SoupSieve
|
|||
|
||||
def compile( # noqa: A001
|
||||
pattern: str,
|
||||
namespaces: Optional[Dict[str, str]] = None,
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[Dict[str, str]] = None,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> cm.SoupSieve:
|
||||
"""Compile CSS pattern."""
|
||||
|
||||
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
|
||||
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
|
||||
|
||||
if isinstance(pattern, SoupSieve):
|
||||
if flags:
|
||||
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
|
||||
|
@ -64,7 +62,12 @@ def compile( # noqa: A001
|
|||
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
|
||||
return pattern
|
||||
|
||||
return cp._cached_css_compile(pattern, ns, cs, flags)
|
||||
return cp._cached_css_compile(
|
||||
pattern,
|
||||
ct.Namespaces(namespaces) if namespaces is not None else namespaces,
|
||||
ct.CustomSelectors(custom) if custom is not None else custom,
|
||||
flags
|
||||
)
|
||||
|
||||
|
||||
def purge() -> None:
|
||||
|
@ -76,10 +79,10 @@ def purge() -> None:
|
|||
def closest(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[Dict[str, str]] = None,
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[Dict[str, str]] = None,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> 'bs4.Tag':
|
||||
"""Match closest ancestor."""
|
||||
|
@ -90,10 +93,10 @@ def closest(
|
|||
def match(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[Dict[str, str]] = None,
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[Dict[str, str]] = None,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> bool:
|
||||
"""Match node."""
|
||||
|
@ -104,12 +107,12 @@ def match(
|
|||
def filter( # noqa: A001
|
||||
select: str,
|
||||
iterable: Iterable['bs4.Tag'],
|
||||
namespaces: Optional[Dict[str, str]] = None,
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[Dict[str, str]] = None,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> List['bs4.Tag']:
|
||||
) -> list['bs4.Tag']:
|
||||
"""Filter list of nodes."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
||||
|
@ -118,10 +121,10 @@ def filter( # noqa: A001
|
|||
def select_one(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[Dict[str, str]] = None,
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[Dict[str, str]] = None,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> 'bs4.Tag':
|
||||
"""Select a single tag."""
|
||||
|
@ -132,13 +135,13 @@ def select_one(
|
|||
def select(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[Dict[str, str]] = None,
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[Dict[str, str]] = None,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> List['bs4.Tag']:
|
||||
) -> list['bs4.Tag']:
|
||||
"""Select the specified tags."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
|
||||
|
@ -147,11 +150,11 @@ def select(
|
|||
def iselect(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[Dict[str, str]] = None,
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[Dict[str, str]] = None,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> Iterator['bs4.Tag']:
|
||||
"""Iterate the specified tags."""
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
"""Meta related things."""
|
||||
from __future__ import annotations
|
||||
from collections import namedtuple
|
||||
import re
|
||||
|
||||
|
@ -83,7 +84,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
|
|||
cls,
|
||||
major: int, minor: int, micro: int, release: str = "final",
|
||||
pre: int = 0, post: int = 0, dev: int = 0
|
||||
) -> "Version":
|
||||
) -> Version:
|
||||
"""Validate version info."""
|
||||
|
||||
# Ensure all parts are positive integers.
|
||||
|
@ -192,5 +193,5 @@ def parse_version(ver: str) -> Version:
|
|||
return Version(major, minor, micro, release, pre, post, dev)
|
||||
|
||||
|
||||
__version_info__ = Version(2, 3, 2, "final", post=1)
|
||||
__version_info__ = Version(2, 4, 0, "final")
|
||||
__version__ = __version_info__._get_canonical()
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
"""CSS matcher."""
|
||||
from __future__ import annotations
|
||||
from datetime import datetime
|
||||
from . import util
|
||||
import re
|
||||
from . import css_types as ct
|
||||
import unicodedata
|
||||
import bs4 # type: ignore[import]
|
||||
from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast
|
||||
from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast # noqa: F401
|
||||
|
||||
# Empty tag pattern (whitespace okay)
|
||||
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
||||
|
@ -64,12 +65,12 @@ class _FakeParent:
|
|||
fake parent so we can traverse the root element as a child.
|
||||
"""
|
||||
|
||||
def __init__(self, element: 'bs4.Tag') -> None:
|
||||
def __init__(self, element: bs4.Tag) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.contents = [element]
|
||||
|
||||
def __len__(self) -> 'bs4.PageElement':
|
||||
def __len__(self) -> bs4.PageElement:
|
||||
"""Length."""
|
||||
|
||||
return len(self.contents)
|
||||
|
@ -87,59 +88,59 @@ class _DocumentNav:
|
|||
raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag)))
|
||||
|
||||
@staticmethod
|
||||
def is_doc(obj: 'bs4.Tag') -> bool:
|
||||
def is_doc(obj: bs4.Tag) -> bool:
|
||||
"""Is `BeautifulSoup` object."""
|
||||
return isinstance(obj, bs4.BeautifulSoup)
|
||||
|
||||
@staticmethod
|
||||
def is_tag(obj: 'bs4.PageElement') -> bool:
|
||||
def is_tag(obj: bs4.PageElement) -> bool:
|
||||
"""Is tag."""
|
||||
return isinstance(obj, bs4.Tag)
|
||||
|
||||
@staticmethod
|
||||
def is_declaration(obj: 'bs4.PageElement') -> bool: # pragma: no cover
|
||||
def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover
|
||||
"""Is declaration."""
|
||||
return isinstance(obj, bs4.Declaration)
|
||||
|
||||
@staticmethod
|
||||
def is_cdata(obj: 'bs4.PageElement') -> bool:
|
||||
def is_cdata(obj: bs4.PageElement) -> bool:
|
||||
"""Is CDATA."""
|
||||
return isinstance(obj, bs4.CData)
|
||||
|
||||
@staticmethod
|
||||
def is_processing_instruction(obj: 'bs4.PageElement') -> bool: # pragma: no cover
|
||||
def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover
|
||||
"""Is processing instruction."""
|
||||
return isinstance(obj, bs4.ProcessingInstruction)
|
||||
|
||||
@staticmethod
|
||||
def is_navigable_string(obj: 'bs4.PageElement') -> bool:
|
||||
def is_navigable_string(obj: bs4.PageElement) -> bool:
|
||||
"""Is navigable string."""
|
||||
return isinstance(obj, bs4.NavigableString)
|
||||
|
||||
@staticmethod
|
||||
def is_special_string(obj: 'bs4.PageElement') -> bool:
|
||||
def is_special_string(obj: bs4.PageElement) -> bool:
|
||||
"""Is special string."""
|
||||
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
|
||||
|
||||
@classmethod
|
||||
def is_content_string(cls, obj: 'bs4.PageElement') -> bool:
|
||||
def is_content_string(cls, obj: bs4.PageElement) -> bool:
|
||||
"""Check if node is content string."""
|
||||
|
||||
return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
|
||||
|
||||
@staticmethod
|
||||
def create_fake_parent(el: 'bs4.Tag') -> _FakeParent:
|
||||
def create_fake_parent(el: bs4.Tag) -> _FakeParent:
|
||||
"""Create fake parent for a given element."""
|
||||
|
||||
return _FakeParent(el)
|
||||
|
||||
@staticmethod
|
||||
def is_xml_tree(el: 'bs4.Tag') -> bool:
|
||||
def is_xml_tree(el: bs4.Tag) -> bool:
|
||||
"""Check if element (or document) is from a XML tree."""
|
||||
|
||||
return bool(el._is_xml)
|
||||
|
||||
def is_iframe(self, el: 'bs4.Tag') -> bool:
|
||||
def is_iframe(self, el: bs4.Tag) -> bool:
|
||||
"""Check if element is an `iframe`."""
|
||||
|
||||
return bool(
|
||||
|
@ -147,7 +148,7 @@ class _DocumentNav:
|
|||
self.is_html_tag(el) # type: ignore[attr-defined]
|
||||
)
|
||||
|
||||
def is_root(self, el: 'bs4.Tag') -> bool:
|
||||
def is_root(self, el: bs4.Tag) -> bool:
|
||||
"""
|
||||
Return whether element is a root element.
|
||||
|
||||
|
@ -161,7 +162,7 @@ class _DocumentNav:
|
|||
root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
|
||||
return root
|
||||
|
||||
def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']:
|
||||
def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
|
||||
"""Get contents or contents in reverse."""
|
||||
if not no_iframe or not self.is_iframe(el):
|
||||
for content in el.contents:
|
||||
|
@ -169,12 +170,12 @@ class _DocumentNav:
|
|||
|
||||
def get_children(
|
||||
self,
|
||||
el: 'bs4.Tag',
|
||||
el: bs4.Tag,
|
||||
start: Optional[int] = None,
|
||||
reverse: bool = False,
|
||||
tags: bool = True,
|
||||
no_iframe: bool = False
|
||||
) -> Iterator['bs4.PageElement']:
|
||||
) -> Iterator[bs4.PageElement]:
|
||||
"""Get children."""
|
||||
|
||||
if not no_iframe or not self.is_iframe(el):
|
||||
|
@ -195,10 +196,10 @@ class _DocumentNav:
|
|||
|
||||
def get_descendants(
|
||||
self,
|
||||
el: 'bs4.Tag',
|
||||
el: bs4.Tag,
|
||||
tags: bool = True,
|
||||
no_iframe: bool = False
|
||||
) -> Iterator['bs4.PageElement']:
|
||||
) -> Iterator[bs4.PageElement]:
|
||||
"""Get descendants."""
|
||||
|
||||
if not no_iframe or not self.is_iframe(el):
|
||||
|
@ -229,7 +230,7 @@ class _DocumentNav:
|
|||
if not tags or is_tag:
|
||||
yield child
|
||||
|
||||
def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag':
|
||||
def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag:
|
||||
"""Get parent."""
|
||||
|
||||
parent = el.parent
|
||||
|
@ -238,25 +239,25 @@ class _DocumentNav:
|
|||
return parent
|
||||
|
||||
@staticmethod
|
||||
def get_tag_name(el: 'bs4.Tag') -> Optional[str]:
|
||||
def get_tag_name(el: bs4.Tag) -> Optional[str]:
|
||||
"""Get tag."""
|
||||
|
||||
return cast(Optional[str], el.name)
|
||||
|
||||
@staticmethod
|
||||
def get_prefix_name(el: 'bs4.Tag') -> Optional[str]:
|
||||
def get_prefix_name(el: bs4.Tag) -> Optional[str]:
|
||||
"""Get prefix."""
|
||||
|
||||
return cast(Optional[str], el.prefix)
|
||||
|
||||
@staticmethod
|
||||
def get_uri(el: 'bs4.Tag') -> Optional[str]:
|
||||
def get_uri(el: bs4.Tag) -> Optional[str]:
|
||||
"""Get namespace `URI`."""
|
||||
|
||||
return cast(Optional[str], el.namespace)
|
||||
|
||||
@classmethod
|
||||
def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
|
||||
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
|
||||
"""Get next sibling tag."""
|
||||
|
||||
sibling = el.next_sibling
|
||||
|
@ -265,7 +266,7 @@ class _DocumentNav:
|
|||
return sibling
|
||||
|
||||
@classmethod
|
||||
def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
|
||||
def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
|
||||
"""Get previous sibling tag."""
|
||||
|
||||
sibling = el.previous_sibling
|
||||
|
@ -274,7 +275,7 @@ class _DocumentNav:
|
|||
return sibling
|
||||
|
||||
@staticmethod
|
||||
def has_html_ns(el: 'bs4.Tag') -> bool:
|
||||
def has_html_ns(el: bs4.Tag) -> bool:
|
||||
"""
|
||||
Check if element has an HTML namespace.
|
||||
|
||||
|
@ -286,13 +287,13 @@ class _DocumentNav:
|
|||
return bool(ns and ns == NS_XHTML)
|
||||
|
||||
@staticmethod
|
||||
def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Return namespace and attribute name without the prefix."""
|
||||
|
||||
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
|
||||
|
||||
@classmethod
|
||||
def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]:
|
||||
def normalize_value(cls, value: Any) -> str | Sequence[str]:
|
||||
"""Normalize the value to be a string or list of strings."""
|
||||
|
||||
# Treat `None` as empty string.
|
||||
|
@ -327,10 +328,10 @@ class _DocumentNav:
|
|||
@classmethod
|
||||
def get_attribute_by_name(
|
||||
cls,
|
||||
el: 'bs4.Tag',
|
||||
el: bs4.Tag,
|
||||
name: str,
|
||||
default: Optional[Union[str, Sequence[str]]] = None
|
||||
) -> Optional[Union[str, Sequence[str]]]:
|
||||
default: Optional[str | Sequence[str]] = None
|
||||
) -> Optional[str | Sequence[str]]:
|
||||
"""Get attribute by name."""
|
||||
|
||||
value = default
|
||||
|
@ -347,14 +348,14 @@ class _DocumentNav:
|
|||
return value
|
||||
|
||||
@classmethod
|
||||
def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]:
|
||||
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]:
|
||||
"""Iterate attributes."""
|
||||
|
||||
for k, v in el.attrs.items():
|
||||
yield k, cls.normalize_value(v)
|
||||
|
||||
@classmethod
|
||||
def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]:
|
||||
def get_classes(cls, el: bs4.Tag) -> Sequence[str]:
|
||||
"""Get classes."""
|
||||
|
||||
classes = cls.get_attribute_by_name(el, 'class', [])
|
||||
|
@ -362,14 +363,14 @@ class _DocumentNav:
|
|||
classes = RE_NOT_WS.findall(classes)
|
||||
return cast(Sequence[str], classes)
|
||||
|
||||
def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str:
|
||||
def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:
|
||||
"""Get text."""
|
||||
|
||||
return ''.join(
|
||||
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
|
||||
)
|
||||
|
||||
def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]:
|
||||
def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:
|
||||
"""Get Own Text."""
|
||||
|
||||
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
|
||||
|
@ -423,10 +424,10 @@ class Inputs:
|
|||
return 0 <= minutes <= 59
|
||||
|
||||
@classmethod
|
||||
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]:
|
||||
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]:
|
||||
"""Parse the input value."""
|
||||
|
||||
parsed = None # type: Optional[Tuple[float, ...]]
|
||||
parsed = None # type: Optional[tuple[float, ...]]
|
||||
if value is None:
|
||||
return value
|
||||
if itype == "date":
|
||||
|
@ -484,7 +485,7 @@ class CSSMatch(_DocumentNav):
|
|||
def __init__(
|
||||
self,
|
||||
selectors: ct.SelectorList,
|
||||
scope: 'bs4.Tag',
|
||||
scope: bs4.Tag,
|
||||
namespaces: Optional[ct.Namespaces],
|
||||
flags: int
|
||||
) -> None:
|
||||
|
@ -492,11 +493,11 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
self.assert_valid_input(scope)
|
||||
self.tag = scope
|
||||
self.cached_meta_lang = [] # type: List[Tuple[str, str]]
|
||||
self.cached_default_forms = [] # type: List[Tuple['bs4.Tag', 'bs4.Tag']]
|
||||
self.cached_indeterminate_forms = [] # type: List[Tuple['bs4.Tag', str, bool]]
|
||||
self.cached_meta_lang = [] # type: list[tuple[str, str]]
|
||||
self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]
|
||||
self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]
|
||||
self.selectors = selectors
|
||||
self.namespaces = {} if namespaces is None else namespaces # type: Union[ct.Namespaces, Dict[str, str]]
|
||||
self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]
|
||||
self.flags = flags
|
||||
self.iframe_restrict = False
|
||||
|
||||
|
@ -527,7 +528,7 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
return self.is_xml or self.has_html_namespace
|
||||
|
||||
def get_tag_ns(self, el: 'bs4.Tag') -> str:
|
||||
def get_tag_ns(self, el: bs4.Tag) -> str:
|
||||
"""Get tag namespace."""
|
||||
|
||||
if self.supports_namespaces():
|
||||
|
@ -539,24 +540,24 @@ class CSSMatch(_DocumentNav):
|
|||
namespace = NS_XHTML
|
||||
return namespace
|
||||
|
||||
def is_html_tag(self, el: 'bs4.Tag') -> bool:
|
||||
def is_html_tag(self, el: bs4.Tag) -> bool:
|
||||
"""Check if tag is in HTML namespace."""
|
||||
|
||||
return self.get_tag_ns(el) == NS_XHTML
|
||||
|
||||
def get_tag(self, el: 'bs4.Tag') -> Optional[str]:
|
||||
def get_tag(self, el: bs4.Tag) -> Optional[str]:
|
||||
"""Get tag."""
|
||||
|
||||
name = self.get_tag_name(el)
|
||||
return util.lower(name) if name is not None and not self.is_xml else name
|
||||
|
||||
def get_prefix(self, el: 'bs4.Tag') -> Optional[str]:
|
||||
def get_prefix(self, el: bs4.Tag) -> Optional[str]:
|
||||
"""Get prefix."""
|
||||
|
||||
prefix = self.get_prefix_name(el)
|
||||
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
|
||||
|
||||
def find_bidi(self, el: 'bs4.Tag') -> Optional[int]:
|
||||
def find_bidi(self, el: bs4.Tag) -> Optional[int]:
|
||||
"""Get directionality from element text."""
|
||||
|
||||
for node in self.get_children(el, tags=False):
|
||||
|
@ -600,13 +601,18 @@ class CSSMatch(_DocumentNav):
|
|||
ranges = lang_range.split('-')
|
||||
subtags = lang_tag.lower().split('-')
|
||||
length = len(ranges)
|
||||
slength = len(subtags)
|
||||
rindex = 0
|
||||
sindex = 0
|
||||
r = ranges[rindex]
|
||||
s = subtags[sindex]
|
||||
|
||||
# Empty specified language should match unspecified language attributes
|
||||
if length == 1 and slength == 1 and not r and r == s:
|
||||
return True
|
||||
|
||||
# Primary tag needs to match
|
||||
if r != '*' and r != s:
|
||||
if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):
|
||||
match = False
|
||||
|
||||
rindex += 1
|
||||
|
@ -645,10 +651,10 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
def match_attribute_name(
|
||||
self,
|
||||
el: 'bs4.Tag',
|
||||
el: bs4.Tag,
|
||||
attr: str,
|
||||
prefix: Optional[str]
|
||||
) -> Optional[Union[str, Sequence[str]]]:
|
||||
) -> Optional[str | Sequence[str]]:
|
||||
"""Match attribute name and return value if it exists."""
|
||||
|
||||
value = None
|
||||
|
@ -696,7 +702,7 @@ class CSSMatch(_DocumentNav):
|
|||
break
|
||||
return value
|
||||
|
||||
def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
|
||||
def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
|
||||
"""Match the namespace of the element."""
|
||||
|
||||
match = True
|
||||
|
@ -717,7 +723,7 @@ class CSSMatch(_DocumentNav):
|
|||
match = False
|
||||
return match
|
||||
|
||||
def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool:
|
||||
def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:
|
||||
"""Match attributes."""
|
||||
|
||||
match = True
|
||||
|
@ -736,7 +742,7 @@ class CSSMatch(_DocumentNav):
|
|||
break
|
||||
return match
|
||||
|
||||
def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
|
||||
def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
|
||||
"""Match tag name."""
|
||||
|
||||
name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
|
||||
|
@ -745,7 +751,7 @@ class CSSMatch(_DocumentNav):
|
|||
name not in (self.get_tag(el), '*')
|
||||
)
|
||||
|
||||
def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool:
|
||||
def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool:
|
||||
"""Match the tag."""
|
||||
|
||||
match = True
|
||||
|
@ -757,7 +763,7 @@ class CSSMatch(_DocumentNav):
|
|||
match = False
|
||||
return match
|
||||
|
||||
def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
|
||||
def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
|
||||
"""Match past relationship."""
|
||||
|
||||
found = False
|
||||
|
@ -785,12 +791,12 @@ class CSSMatch(_DocumentNav):
|
|||
found = self.match_selectors(sibling, relation)
|
||||
return found
|
||||
|
||||
def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool:
|
||||
def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:
|
||||
"""Match future child."""
|
||||
|
||||
match = False
|
||||
if recursive:
|
||||
children = self.get_descendants # type: Callable[..., Iterator['bs4.Tag']]
|
||||
children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]]
|
||||
else:
|
||||
children = self.get_children
|
||||
for child in children(parent, no_iframe=self.iframe_restrict):
|
||||
|
@ -799,7 +805,7 @@ class CSSMatch(_DocumentNav):
|
|||
break
|
||||
return match
|
||||
|
||||
def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
|
||||
def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
|
||||
"""Match future relationship."""
|
||||
|
||||
found = False
|
||||
|
@ -822,7 +828,7 @@ class CSSMatch(_DocumentNav):
|
|||
found = self.match_selectors(sibling, relation)
|
||||
return found
|
||||
|
||||
def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
|
||||
def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
|
||||
"""Match relationship to other elements."""
|
||||
|
||||
found = False
|
||||
|
@ -837,7 +843,7 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
return found
|
||||
|
||||
def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool:
|
||||
def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:
|
||||
"""Match element's ID."""
|
||||
|
||||
found = True
|
||||
|
@ -847,7 +853,7 @@ class CSSMatch(_DocumentNav):
|
|||
break
|
||||
return found
|
||||
|
||||
def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool:
|
||||
def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:
|
||||
"""Match element's classes."""
|
||||
|
||||
current_classes = self.get_classes(el)
|
||||
|
@ -858,7 +864,7 @@ class CSSMatch(_DocumentNav):
|
|||
break
|
||||
return found
|
||||
|
||||
def match_root(self, el: 'bs4.Tag') -> bool:
|
||||
def match_root(self, el: bs4.Tag) -> bool:
|
||||
"""Match element as root."""
|
||||
|
||||
is_root = self.is_root(el)
|
||||
|
@ -884,20 +890,20 @@ class CSSMatch(_DocumentNav):
|
|||
sibling = self.get_next(sibling, tags=False)
|
||||
return is_root
|
||||
|
||||
def match_scope(self, el: 'bs4.Tag') -> bool:
|
||||
def match_scope(self, el: bs4.Tag) -> bool:
|
||||
"""Match element as scope."""
|
||||
|
||||
return self.scope is el
|
||||
|
||||
def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool:
|
||||
def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:
|
||||
"""Match tag type for `nth` matches."""
|
||||
|
||||
return(
|
||||
return (
|
||||
(self.get_tag(child) == self.get_tag(el)) and
|
||||
(self.get_tag_ns(child) == self.get_tag_ns(el))
|
||||
)
|
||||
|
||||
def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool:
|
||||
def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool:
|
||||
"""Match `nth` elements."""
|
||||
|
||||
matched = True
|
||||
|
@ -998,7 +1004,7 @@ class CSSMatch(_DocumentNav):
|
|||
break
|
||||
return matched
|
||||
|
||||
def match_empty(self, el: 'bs4.Tag') -> bool:
|
||||
def match_empty(self, el: bs4.Tag) -> bool:
|
||||
"""Check if element is empty (if requested)."""
|
||||
|
||||
is_empty = True
|
||||
|
@ -1011,7 +1017,7 @@ class CSSMatch(_DocumentNav):
|
|||
break
|
||||
return is_empty
|
||||
|
||||
def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool:
|
||||
def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:
|
||||
"""Match selectors."""
|
||||
|
||||
match = True
|
||||
|
@ -1020,11 +1026,11 @@ class CSSMatch(_DocumentNav):
|
|||
match = False
|
||||
return match
|
||||
|
||||
def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool:
|
||||
def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:
|
||||
"""Match element if it contains text."""
|
||||
|
||||
match = True
|
||||
content = None # type: Optional[Union[str, Sequence[str]]]
|
||||
content = None # type: Optional[str | Sequence[str]]
|
||||
for contain_list in contains:
|
||||
if content is None:
|
||||
if contain_list.own:
|
||||
|
@ -1048,7 +1054,7 @@ class CSSMatch(_DocumentNav):
|
|||
match = False
|
||||
return match
|
||||
|
||||
def match_default(self, el: 'bs4.Tag') -> bool:
|
||||
def match_default(self, el: bs4.Tag) -> bool:
|
||||
"""Match default."""
|
||||
|
||||
match = False
|
||||
|
@ -1087,13 +1093,13 @@ class CSSMatch(_DocumentNav):
|
|||
break
|
||||
return match
|
||||
|
||||
def match_indeterminate(self, el: 'bs4.Tag') -> bool:
|
||||
def match_indeterminate(self, el: bs4.Tag) -> bool:
|
||||
"""Match default."""
|
||||
|
||||
match = False
|
||||
name = cast(str, self.get_attribute_by_name(el, 'name'))
|
||||
|
||||
def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']:
|
||||
def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]:
|
||||
"""Find this input's form."""
|
||||
form = None
|
||||
parent = self.get_parent(el, no_iframe=True)
|
||||
|
@ -1148,7 +1154,7 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
return match
|
||||
|
||||
def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool:
|
||||
def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
|
||||
"""Match languages."""
|
||||
|
||||
match = False
|
||||
|
@ -1183,7 +1189,7 @@ class CSSMatch(_DocumentNav):
|
|||
break
|
||||
|
||||
# Use cached meta language.
|
||||
if not found_lang and self.cached_meta_lang:
|
||||
if found_lang is None and self.cached_meta_lang:
|
||||
for cache in self.cached_meta_lang:
|
||||
if root is cache[0]:
|
||||
found_lang = cache[1]
|
||||
|
@ -1217,13 +1223,13 @@ class CSSMatch(_DocumentNav):
|
|||
found_lang = content
|
||||
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
|
||||
break
|
||||
if found_lang:
|
||||
if found_lang is not None:
|
||||
break
|
||||
if not found_lang:
|
||||
if found_lang is None:
|
||||
self.cached_meta_lang.append((cast(str, root), ''))
|
||||
|
||||
# If we determined a language, compare.
|
||||
if found_lang:
|
||||
if found_lang is not None:
|
||||
for patterns in langs:
|
||||
match = False
|
||||
for pattern in patterns:
|
||||
|
@ -1234,7 +1240,7 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
return match
|
||||
|
||||
def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool:
|
||||
def match_dir(self, el: bs4.Tag, directionality: int) -> bool:
|
||||
"""Check directionality."""
|
||||
|
||||
# If we have to match both left and right, we can't match either.
|
||||
|
@ -1297,7 +1303,7 @@ class CSSMatch(_DocumentNav):
|
|||
# Match parents direction
|
||||
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
|
||||
|
||||
def match_range(self, el: 'bs4.Tag', condition: int) -> bool:
|
||||
def match_range(self, el: bs4.Tag, condition: int) -> bool:
|
||||
"""
|
||||
Match range.
|
||||
|
||||
|
@ -1337,7 +1343,7 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
|
||||
|
||||
def match_defined(self, el: 'bs4.Tag') -> bool:
|
||||
def match_defined(self, el: bs4.Tag) -> bool:
|
||||
"""
|
||||
Match defined.
|
||||
|
||||
|
@ -1360,7 +1366,7 @@ class CSSMatch(_DocumentNav):
|
|||
)
|
||||
)
|
||||
|
||||
def match_placeholder_shown(self, el: 'bs4.Tag') -> bool:
|
||||
def match_placeholder_shown(self, el: bs4.Tag) -> bool:
|
||||
"""
|
||||
Match placeholder shown according to HTML spec.
|
||||
|
||||
|
@ -1375,7 +1381,7 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
return match
|
||||
|
||||
def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool:
|
||||
def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:
|
||||
"""Check if element matches one of the selectors."""
|
||||
|
||||
match = False
|
||||
|
@ -1459,7 +1465,7 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
return match
|
||||
|
||||
def select(self, limit: int = 0) -> Iterator['bs4.Tag']:
|
||||
def select(self, limit: int = 0) -> Iterator[bs4.Tag]:
|
||||
"""Match all tags under the targeted tag."""
|
||||
|
||||
lim = None if limit < 1 else limit
|
||||
|
@ -1472,7 +1478,7 @@ class CSSMatch(_DocumentNav):
|
|||
if lim < 1:
|
||||
break
|
||||
|
||||
def closest(self) -> Optional['bs4.Tag']:
|
||||
def closest(self) -> Optional[bs4.Tag]:
|
||||
"""Match closest ancestor."""
|
||||
|
||||
current = self.tag
|
||||
|
@ -1484,12 +1490,12 @@ class CSSMatch(_DocumentNav):
|
|||
current = self.get_parent(current)
|
||||
return closest
|
||||
|
||||
def filter(self) -> List['bs4.Tag']: # noqa A001
|
||||
def filter(self) -> list[bs4.Tag]: # noqa A001
|
||||
"""Filter tag's children."""
|
||||
|
||||
return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
|
||||
|
||||
def match(self, el: 'bs4.Tag') -> bool:
|
||||
def match(self, el: bs4.Tag) -> bool:
|
||||
"""Match."""
|
||||
|
||||
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
|
||||
|
@ -1501,7 +1507,7 @@ class SoupSieve(ct.Immutable):
|
|||
pattern: str
|
||||
selectors: ct.SelectorList
|
||||
namespaces: Optional[ct.Namespaces]
|
||||
custom: Dict[str, str]
|
||||
custom: dict[str, str]
|
||||
flags: int
|
||||
|
||||
__slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
|
||||
|
@ -1524,17 +1530,17 @@ class SoupSieve(ct.Immutable):
|
|||
flags=flags
|
||||
)
|
||||
|
||||
def match(self, tag: 'bs4.Tag') -> bool:
|
||||
def match(self, tag: bs4.Tag) -> bool:
|
||||
"""Match."""
|
||||
|
||||
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
|
||||
|
||||
def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag':
|
||||
def closest(self, tag: bs4.Tag) -> bs4.Tag:
|
||||
"""Match closest ancestor."""
|
||||
|
||||
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
|
||||
|
||||
def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']: # noqa A001
|
||||
def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001
|
||||
"""
|
||||
Filter.
|
||||
|
||||
|
@ -1551,18 +1557,18 @@ class SoupSieve(ct.Immutable):
|
|||
else:
|
||||
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
|
||||
|
||||
def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag':
|
||||
def select_one(self, tag: bs4.Tag) -> bs4.Tag:
|
||||
"""Select a single tag."""
|
||||
|
||||
tags = self.select(tag, limit=1)
|
||||
return tags[0] if tags else None
|
||||
|
||||
def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']:
|
||||
def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:
|
||||
"""Select the specified tags."""
|
||||
|
||||
return list(self.iselect(tag, limit))
|
||||
|
||||
def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']:
|
||||
def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
|
||||
"""Iterate the specified tags."""
|
||||
|
||||
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
"""CSS selector parser."""
|
||||
from __future__ import annotations
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from . import util
|
||||
|
@ -6,7 +7,7 @@ from . import css_match as cm
|
|||
from . import css_types as ct
|
||||
from .util import SelectorSyntaxError
|
||||
import warnings
|
||||
from typing import Optional, Dict, Match, Tuple, Type, Any, List, Union, Iterator, cast
|
||||
from typing import Optional, Match, Any, Iterator, cast
|
||||
|
||||
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
||||
|
||||
|
@ -232,7 +233,7 @@ def _purge_cache() -> None:
|
|||
_cached_css_compile.cache_clear()
|
||||
|
||||
|
||||
def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str, ct.SelectorList]]:
|
||||
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
|
||||
"""Process custom."""
|
||||
|
||||
custom_selectors = {}
|
||||
|
@ -325,7 +326,7 @@ class SelectorPattern:
|
|||
class SpecialPseudoPattern(SelectorPattern):
|
||||
"""Selector pattern."""
|
||||
|
||||
def __init__(self, patterns: Tuple[Tuple[str, Tuple[str, ...], str, Type[SelectorPattern]], ...]) -> None:
|
||||
def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.patterns = {}
|
||||
|
@ -372,19 +373,19 @@ class _Selector:
|
|||
"""Initialize."""
|
||||
|
||||
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
|
||||
self.ids = kwargs.get('ids', []) # type: List[str]
|
||||
self.classes = kwargs.get('classes', []) # type: List[str]
|
||||
self.attributes = kwargs.get('attributes', []) # type: List[ct.SelectorAttribute]
|
||||
self.nth = kwargs.get('nth', []) # type: List[ct.SelectorNth]
|
||||
self.selectors = kwargs.get('selectors', []) # type: List[ct.SelectorList]
|
||||
self.relations = kwargs.get('relations', []) # type: List[_Selector]
|
||||
self.ids = kwargs.get('ids', []) # type: list[str]
|
||||
self.classes = kwargs.get('classes', []) # type: list[str]
|
||||
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
|
||||
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
|
||||
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
|
||||
self.relations = kwargs.get('relations', []) # type: list[_Selector]
|
||||
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
|
||||
self.contains = kwargs.get('contains', []) # type: List[ct.SelectorContains]
|
||||
self.lang = kwargs.get('lang', []) # type: List[ct.SelectorLang]
|
||||
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
|
||||
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
|
||||
self.flags = kwargs.get('flags', 0) # type: int
|
||||
self.no_match = kwargs.get('no_match', False) # type: bool
|
||||
|
||||
def _freeze_relations(self, relations: List['_Selector']) -> ct.SelectorList:
|
||||
def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
|
||||
"""Freeze relation."""
|
||||
|
||||
if relations:
|
||||
|
@ -394,7 +395,7 @@ class _Selector:
|
|||
else:
|
||||
return ct.SelectorList()
|
||||
|
||||
def freeze(self) -> Union[ct.Selector, ct.SelectorNull]:
|
||||
def freeze(self) -> ct.Selector | ct.SelectorNull:
|
||||
"""Freeze self."""
|
||||
|
||||
if self.no_match:
|
||||
|
@ -461,7 +462,7 @@ class CSSParser:
|
|||
def __init__(
|
||||
self,
|
||||
selector: str,
|
||||
custom: Optional[Dict[str, Union[str, ct.SelectorList]]] = None,
|
||||
custom: Optional[dict[str, str | ct.SelectorList]] = None,
|
||||
flags: int = 0
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
@ -583,9 +584,9 @@ class CSSParser:
|
|||
sel: _Selector,
|
||||
m: Match[str],
|
||||
has_selector: bool,
|
||||
iselector: Iterator[Tuple[str, Match[str]]],
|
||||
iselector: Iterator[tuple[str, Match[str]]],
|
||||
is_html: bool
|
||||
) -> Tuple[bool, bool]:
|
||||
) -> tuple[bool, bool]:
|
||||
"""Parse pseudo class."""
|
||||
|
||||
complex_pseudo = False
|
||||
|
@ -678,7 +679,7 @@ class CSSParser:
|
|||
sel: _Selector,
|
||||
m: Match[str],
|
||||
has_selector: bool,
|
||||
iselector: Iterator[Tuple[str, Match[str]]]
|
||||
iselector: Iterator[tuple[str, Match[str]]]
|
||||
) -> bool:
|
||||
"""Parse `nth` pseudo."""
|
||||
|
||||
|
@ -743,7 +744,7 @@ class CSSParser:
|
|||
sel: _Selector,
|
||||
name: str,
|
||||
has_selector: bool,
|
||||
iselector: Iterator[Tuple[str, Match[str]]],
|
||||
iselector: Iterator[tuple[str, Match[str]]],
|
||||
index: int
|
||||
) -> bool:
|
||||
"""Parse pseudo with opening bracket."""
|
||||
|
@ -752,7 +753,7 @@ class CSSParser:
|
|||
if name == ':not':
|
||||
flags |= FLG_NOT
|
||||
elif name == ':has':
|
||||
flags |= FLG_RELATIVE | FLG_FORGIVE
|
||||
flags |= FLG_RELATIVE
|
||||
elif name in (':where', ':is'):
|
||||
flags |= FLG_FORGIVE
|
||||
|
||||
|
@ -766,21 +767,16 @@ class CSSParser:
|
|||
sel: _Selector,
|
||||
m: Match[str],
|
||||
has_selector: bool,
|
||||
selectors: List[_Selector],
|
||||
selectors: list[_Selector],
|
||||
rel_type: str,
|
||||
index: int
|
||||
) -> Tuple[bool, _Selector, str]:
|
||||
) -> tuple[bool, _Selector, str]:
|
||||
"""Parse combinator tokens."""
|
||||
|
||||
combinator = m.group('relation').strip()
|
||||
if not combinator:
|
||||
combinator = WS_COMBINATOR
|
||||
if combinator == COMMA_COMBINATOR:
|
||||
if not has_selector:
|
||||
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
|
||||
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
|
||||
sel.no_match = True
|
||||
|
||||
sel.rel_type = rel_type
|
||||
selectors[-1].relations.append(sel)
|
||||
rel_type = ":" + WS_COMBINATOR
|
||||
|
@ -814,12 +810,12 @@ class CSSParser:
|
|||
sel: _Selector,
|
||||
m: Match[str],
|
||||
has_selector: bool,
|
||||
selectors: List[_Selector],
|
||||
relations: List[_Selector],
|
||||
selectors: list[_Selector],
|
||||
relations: list[_Selector],
|
||||
is_pseudo: bool,
|
||||
is_forgive: bool,
|
||||
index: int
|
||||
) -> Tuple[bool, _Selector]:
|
||||
) -> tuple[bool, _Selector]:
|
||||
"""Parse combinator tokens."""
|
||||
|
||||
combinator = m.group('relation').strip()
|
||||
|
@ -924,7 +920,7 @@ class CSSParser:
|
|||
|
||||
def parse_selectors(
|
||||
self,
|
||||
iselector: Iterator[Tuple[str, Match[str]]],
|
||||
iselector: Iterator[tuple[str, Match[str]]],
|
||||
index: int = 0,
|
||||
flags: int = 0
|
||||
) -> ct.SelectorList:
|
||||
|
@ -935,7 +931,7 @@ class CSSParser:
|
|||
selectors = []
|
||||
has_selector = False
|
||||
closed = False
|
||||
relations = [] # type: List[_Selector]
|
||||
relations = [] # type: list[_Selector]
|
||||
rel_type = ":" + WS_COMBINATOR
|
||||
|
||||
# Setup various flags
|
||||
|
@ -1069,18 +1065,8 @@ class CSSParser:
|
|||
selectors.append(sel)
|
||||
|
||||
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
|
||||
elif is_forgive:
|
||||
if is_relative:
|
||||
# Handle relative selectors pseudo-classes with empty slots like `:has()`
|
||||
if selectors and selectors[-1].rel_type is None and rel_type == ': ':
|
||||
sel.rel_type = rel_type
|
||||
sel.no_match = True
|
||||
selectors[-1].relations.append(sel)
|
||||
has_selector = True
|
||||
else:
|
||||
# Handle normal pseudo-classes with empty slots
|
||||
if not selectors or not relations:
|
||||
# Others like `:is()` etc.
|
||||
elif is_forgive and (not selectors or not relations):
|
||||
# Handle normal pseudo-classes with empty slots like `:is()` etc.
|
||||
sel.no_match = True
|
||||
del relations[:]
|
||||
selectors.append(sel)
|
||||
|
@ -1112,7 +1098,7 @@ class CSSParser:
|
|||
# Return selector list
|
||||
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
|
||||
|
||||
def selector_iter(self, pattern: str) -> Iterator[Tuple[str, Match[str]]]:
|
||||
def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
|
||||
"""Iterate selector tokens."""
|
||||
|
||||
# Ignore whitespace and comments at start and end of pattern
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
"""CSS selector structure items."""
|
||||
from __future__ import annotations
|
||||
import copyreg
|
||||
from .pretty import pretty
|
||||
from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
|
||||
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
|
||||
|
||||
__all__ = (
|
||||
'Selector',
|
||||
|
@ -33,7 +34,7 @@ SEL_PLACEHOLDER_SHOWN = 0x400
|
|||
class Immutable:
|
||||
"""Immutable."""
|
||||
|
||||
__slots__: Tuple[str, ...] = ('_hash',)
|
||||
__slots__: tuple[str, ...] = ('_hash',)
|
||||
|
||||
_hash: int
|
||||
|
||||
|
@ -48,7 +49,7 @@ class Immutable:
|
|||
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
|
||||
|
||||
@classmethod
|
||||
def __base__(cls) -> "Type[Immutable]":
|
||||
def __base__(cls) -> "type[Immutable]":
|
||||
"""Get base class."""
|
||||
|
||||
return cls
|
||||
|
@ -99,7 +100,7 @@ class ImmutableDict(Mapping[Any, Any]):
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]
|
||||
arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
|
@ -107,7 +108,7 @@ class ImmutableDict(Mapping[Any, Any]):
|
|||
self._d = dict(arg)
|
||||
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
|
||||
|
||||
def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None:
|
||||
def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
|
||||
"""Validate arguments."""
|
||||
|
||||
if isinstance(arg, dict):
|
||||
|
@ -147,12 +148,12 @@ class ImmutableDict(Mapping[Any, Any]):
|
|||
class Namespaces(ImmutableDict):
|
||||
"""Namespaces."""
|
||||
|
||||
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
|
||||
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(arg)
|
||||
|
||||
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
|
||||
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Validate arguments."""
|
||||
|
||||
if isinstance(arg, dict):
|
||||
|
@ -165,12 +166,12 @@ class Namespaces(ImmutableDict):
|
|||
class CustomSelectors(ImmutableDict):
|
||||
"""Custom selectors."""
|
||||
|
||||
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
|
||||
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(arg)
|
||||
|
||||
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
|
||||
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Validate arguments."""
|
||||
|
||||
if isinstance(arg, dict):
|
||||
|
@ -188,30 +189,30 @@ class Selector(Immutable):
|
|||
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
|
||||
)
|
||||
|
||||
tag: Optional['SelectorTag']
|
||||
ids: Tuple[str, ...]
|
||||
classes: Tuple[str, ...]
|
||||
attributes: Tuple['SelectorAttribute', ...]
|
||||
nth: Tuple['SelectorNth', ...]
|
||||
selectors: Tuple['SelectorList', ...]
|
||||
relation: 'SelectorList'
|
||||
tag: Optional[SelectorTag]
|
||||
ids: tuple[str, ...]
|
||||
classes: tuple[str, ...]
|
||||
attributes: tuple[SelectorAttribute, ...]
|
||||
nth: tuple[SelectorNth, ...]
|
||||
selectors: tuple[SelectorList, ...]
|
||||
relation: SelectorList
|
||||
rel_type: Optional[str]
|
||||
contains: Tuple['SelectorContains', ...]
|
||||
lang: Tuple['SelectorLang', ...]
|
||||
contains: tuple[SelectorContains, ...]
|
||||
lang: tuple[SelectorLang, ...]
|
||||
flags: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tag: Optional['SelectorTag'],
|
||||
ids: Tuple[str, ...],
|
||||
classes: Tuple[str, ...],
|
||||
attributes: Tuple['SelectorAttribute', ...],
|
||||
nth: Tuple['SelectorNth', ...],
|
||||
selectors: Tuple['SelectorList', ...],
|
||||
relation: 'SelectorList',
|
||||
tag: Optional[SelectorTag],
|
||||
ids: tuple[str, ...],
|
||||
classes: tuple[str, ...],
|
||||
attributes: tuple[SelectorAttribute, ...],
|
||||
nth: tuple[SelectorNth, ...],
|
||||
selectors: tuple[SelectorList, ...],
|
||||
relation: SelectorList,
|
||||
rel_type: Optional[str],
|
||||
contains: Tuple['SelectorContains', ...],
|
||||
lang: Tuple['SelectorLang', ...],
|
||||
contains: tuple[SelectorContains, ...],
|
||||
lang: tuple[SelectorLang, ...],
|
||||
flags: int
|
||||
):
|
||||
"""Initialize."""
|
||||
|
@ -286,7 +287,7 @@ class SelectorContains(Immutable):
|
|||
|
||||
__slots__ = ("text", "own", "_hash")
|
||||
|
||||
text: Tuple[str, ...]
|
||||
text: tuple[str, ...]
|
||||
own: bool
|
||||
|
||||
def __init__(self, text: Iterable[str], own: bool) -> None:
|
||||
|
@ -305,9 +306,9 @@ class SelectorNth(Immutable):
|
|||
b: int
|
||||
of_type: bool
|
||||
last: bool
|
||||
selectors: 'SelectorList'
|
||||
selectors: SelectorList
|
||||
|
||||
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None:
|
||||
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(
|
||||
|
@ -325,7 +326,7 @@ class SelectorLang(Immutable):
|
|||
|
||||
__slots__ = ("languages", "_hash",)
|
||||
|
||||
languages: Tuple[str, ...]
|
||||
languages: tuple[str, ...]
|
||||
|
||||
def __init__(self, languages: Iterable[str]):
|
||||
"""Initialize."""
|
||||
|
@ -353,13 +354,13 @@ class SelectorList(Immutable):
|
|||
|
||||
__slots__ = ("selectors", "is_not", "is_html", "_hash")
|
||||
|
||||
selectors: Tuple[Union['Selector', 'SelectorNull'], ...]
|
||||
selectors: tuple[Selector | SelectorNull, ...]
|
||||
is_not: bool
|
||||
is_html: bool
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None,
|
||||
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
|
||||
is_not: bool = False,
|
||||
is_html: bool = False
|
||||
) -> None:
|
||||
|
@ -371,7 +372,7 @@ class SelectorList(Immutable):
|
|||
is_html=is_html
|
||||
)
|
||||
|
||||
def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]:
|
||||
def __iter__(self) -> Iterator[Selector | SelectorNull]:
|
||||
"""Iterator."""
|
||||
|
||||
return iter(self.selectors)
|
||||
|
@ -381,7 +382,7 @@ class SelectorList(Immutable):
|
|||
|
||||
return len(self.selectors)
|
||||
|
||||
def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']:
|
||||
def __getitem__(self, index: int) -> Selector | SelectorNull:
|
||||
"""Get item."""
|
||||
|
||||
return self.selectors[index]
|
||||
|
|
|
@ -65,6 +65,7 @@ SelectorList(
|
|||
is_html=False)
|
||||
```
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
"""Utility."""
|
||||
from __future__ import annotations
|
||||
from functools import wraps, lru_cache
|
||||
import warnings
|
||||
import re
|
||||
from typing import Callable, Any, Optional, Tuple, List
|
||||
from typing import Callable, Any, Optional
|
||||
|
||||
DEBUG = 0x00001
|
||||
|
||||
|
@ -75,13 +76,13 @@ def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no co
|
|||
)
|
||||
|
||||
|
||||
def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]:
|
||||
def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
|
||||
"""Get the pattern context."""
|
||||
|
||||
last = 0
|
||||
current_line = 1
|
||||
col = 1
|
||||
text = [] # type: List[str]
|
||||
text = [] # type: list[str]
|
||||
line = 1
|
||||
offset = None # type: Optional[int]
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ arrow==1.2.3
|
|||
backports.csv==1.0.7
|
||||
backports.functools-lru-cache==1.6.4
|
||||
backports.zoneinfo==0.2.1;python_version<"3.9"
|
||||
beautifulsoup4==4.11.1
|
||||
beautifulsoup4==4.11.2
|
||||
bleach==6.0.0
|
||||
certifi==2022.12.7
|
||||
cheroot==9.0.0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue