mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-08 06:00:51 -07:00
Bump beautifulsoup4 from 4.11.1 to 4.11.2 (#1987)
* Bump beautifulsoup4 from 4.11.1 to 4.11.2 Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.11.1 to 4.11.2. --- updated-dependencies: - dependency-name: beautifulsoup4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * Update beautifulsoup4==4.11.2 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
This commit is contained in:
parent
ded93ef2f5
commit
8e42757b2d
23 changed files with 449 additions and 537 deletions
|
@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||||
provides methods and Pythonic idioms that make it easy to navigate,
|
provides methods and Pythonic idioms that make it easy to navigate,
|
||||||
search, and modify the parse tree.
|
search, and modify the parse tree.
|
||||||
|
|
||||||
Beautiful Soup works with Python 3.5 and up. It works better if lxml
|
Beautiful Soup works with Python 3.6 and up. It works better if lxml
|
||||||
and/or html5lib is installed.
|
and/or html5lib is installed.
|
||||||
|
|
||||||
For more than you ever wanted to know about Beautiful Soup, see the
|
For more than you ever wanted to know about Beautiful Soup, see the
|
||||||
|
@ -15,8 +15,8 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||||
__version__ = "4.11.1"
|
__version__ = "4.11.2"
|
||||||
__copyright__ = "Copyright (c) 2004-2022 Leonard Richardson"
|
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
|
||||||
# Use of this source code is governed by the MIT license.
|
# Use of this source code is governed by the MIT license.
|
||||||
__license__ = "MIT"
|
__license__ = "MIT"
|
||||||
|
|
||||||
|
@ -211,7 +211,7 @@ class BeautifulSoup(Tag):
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
'The "%s" argument to the BeautifulSoup constructor '
|
'The "%s" argument to the BeautifulSoup constructor '
|
||||||
'has been renamed to "%s."' % (old_name, new_name),
|
'has been renamed to "%s."' % (old_name, new_name),
|
||||||
DeprecationWarning
|
DeprecationWarning, stacklevel=3
|
||||||
)
|
)
|
||||||
return kwargs.pop(old_name)
|
return kwargs.pop(old_name)
|
||||||
return None
|
return None
|
||||||
|
@ -405,7 +405,8 @@ class BeautifulSoup(Tag):
|
||||||
'The input looks more like a URL than markup. You may want to use'
|
'The input looks more like a URL than markup. You may want to use'
|
||||||
' an HTTP client like requests to get the document behind'
|
' an HTTP client like requests to get the document behind'
|
||||||
' the URL, and feed that document to Beautiful Soup.',
|
' the URL, and feed that document to Beautiful Soup.',
|
||||||
MarkupResemblesLocatorWarning
|
MarkupResemblesLocatorWarning,
|
||||||
|
stacklevel=3
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
@ -436,7 +437,7 @@ class BeautifulSoup(Tag):
|
||||||
'The input looks more like a filename than markup. You may'
|
'The input looks more like a filename than markup. You may'
|
||||||
' want to open this file and pass the filehandle into'
|
' want to open this file and pass the filehandle into'
|
||||||
' Beautiful Soup.',
|
' Beautiful Soup.',
|
||||||
MarkupResemblesLocatorWarning
|
MarkupResemblesLocatorWarning, stacklevel=3
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
@ -789,7 +790,7 @@ class BeautifulStoneSoup(BeautifulSoup):
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
'The BeautifulStoneSoup class is deprecated. Instead of using '
|
'The BeautifulStoneSoup class is deprecated. Instead of using '
|
||||||
'it, pass features="xml" into the BeautifulSoup constructor.',
|
'it, pass features="xml" into the BeautifulSoup constructor.',
|
||||||
DeprecationWarning
|
DeprecationWarning, stacklevel=2
|
||||||
)
|
)
|
||||||
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
|
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
|
|
@ -122,7 +122,7 @@ class TreeBuilder(object):
|
||||||
|
|
||||||
# A value for these tag/attribute combinations is a space- or
|
# A value for these tag/attribute combinations is a space- or
|
||||||
# comma-separated list of CDATA, rather than a single CDATA.
|
# comma-separated list of CDATA, rather than a single CDATA.
|
||||||
DEFAULT_CDATA_LIST_ATTRIBUTES = {}
|
DEFAULT_CDATA_LIST_ATTRIBUTES = defaultdict(list)
|
||||||
|
|
||||||
# Whitespace should be preserved inside these tags.
|
# Whitespace should be preserved inside these tags.
|
||||||
DEFAULT_PRESERVE_WHITESPACE_TAGS = set()
|
DEFAULT_PRESERVE_WHITESPACE_TAGS = set()
|
||||||
|
|
|
@ -70,7 +70,10 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||||
# ATM because the html5lib TreeBuilder doesn't use
|
# ATM because the html5lib TreeBuilder doesn't use
|
||||||
# UnicodeDammit.
|
# UnicodeDammit.
|
||||||
if exclude_encodings:
|
if exclude_encodings:
|
||||||
warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
|
warnings.warn(
|
||||||
|
"You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.",
|
||||||
|
stacklevel=3
|
||||||
|
)
|
||||||
|
|
||||||
# html5lib only parses HTML, so if it's given XML that's worth
|
# html5lib only parses HTML, so if it's given XML that's worth
|
||||||
# noting.
|
# noting.
|
||||||
|
@ -81,7 +84,10 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||||
# These methods are defined by Beautiful Soup.
|
# These methods are defined by Beautiful Soup.
|
||||||
def feed(self, markup):
|
def feed(self, markup):
|
||||||
if self.soup.parse_only is not None:
|
if self.soup.parse_only is not None:
|
||||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
warnings.warn(
|
||||||
|
"You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.",
|
||||||
|
stacklevel=4
|
||||||
|
)
|
||||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||||
self.underlying_builder.parser = parser
|
self.underlying_builder.parser = parser
|
||||||
extra_kwargs = dict()
|
extra_kwargs = dict()
|
||||||
|
@ -249,9 +255,9 @@ class AttrList(object):
|
||||||
# If this attribute is a multi-valued attribute for this element,
|
# If this attribute is a multi-valued attribute for this element,
|
||||||
# turn its value into a list.
|
# turn its value into a list.
|
||||||
list_attr = self.element.cdata_list_attributes or {}
|
list_attr = self.element.cdata_list_attributes or {}
|
||||||
if (name in list_attr.get('*')
|
if (name in list_attr.get('*', [])
|
||||||
or (self.element.name in list_attr
|
or (self.element.name in list_attr
|
||||||
and name in list_attr[self.element.name])):
|
and name in list_attr.get(self.element.name, []))):
|
||||||
# A node that is being cloned may have already undergone
|
# A node that is being cloned may have already undergone
|
||||||
# this procedure.
|
# this procedure.
|
||||||
if not isinstance(value, list):
|
if not isinstance(value, list):
|
||||||
|
|
|
@ -10,30 +10,9 @@ __all__ = [
|
||||||
|
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
try:
|
|
||||||
from html.parser import HTMLParseError
|
|
||||||
except ImportError as e:
|
|
||||||
# HTMLParseError is removed in Python 3.5. Since it can never be
|
|
||||||
# thrown in 3.5, we can just define our own class as a placeholder.
|
|
||||||
class HTMLParseError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
|
|
||||||
# argument, which we'd like to set to False. Unfortunately,
|
|
||||||
# http://bugs.python.org/issue13273 makes strict=True a better bet
|
|
||||||
# before Python 3.2.3.
|
|
||||||
#
|
|
||||||
# At the end of this file, we monkeypatch HTMLParser so that
|
|
||||||
# strict=True works well on Python 3.2.2.
|
|
||||||
major, minor, release = sys.version_info[:3]
|
|
||||||
CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
|
|
||||||
CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
|
|
||||||
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
|
|
||||||
|
|
||||||
|
|
||||||
from bs4.element import (
|
from bs4.element import (
|
||||||
CData,
|
CData,
|
||||||
Comment,
|
Comment,
|
||||||
|
@ -91,19 +70,6 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
||||||
|
|
||||||
self._initialize_xml_detector()
|
self._initialize_xml_detector()
|
||||||
|
|
||||||
def error(self, msg):
|
|
||||||
"""In Python 3, HTMLParser subclasses must implement error(), although
|
|
||||||
this requirement doesn't appear to be documented.
|
|
||||||
|
|
||||||
In Python 2, HTMLParser implements error() by raising an exception,
|
|
||||||
which we don't want to do.
|
|
||||||
|
|
||||||
In any event, this method is called only on very strange
|
|
||||||
markup and our best strategy is to pretend it didn't happen
|
|
||||||
and keep going.
|
|
||||||
"""
|
|
||||||
warnings.warn(msg)
|
|
||||||
|
|
||||||
def handle_startendtag(self, name, attrs):
|
def handle_startendtag(self, name, attrs):
|
||||||
"""Handle an incoming empty-element tag.
|
"""Handle an incoming empty-element tag.
|
||||||
|
|
||||||
|
@ -203,9 +169,10 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
||||||
|
|
||||||
:param name: Character number, possibly in hexadecimal.
|
:param name: Character number, possibly in hexadecimal.
|
||||||
"""
|
"""
|
||||||
# XXX workaround for a bug in HTMLParser. Remove this once
|
# TODO: This was originally a workaround for a bug in
|
||||||
# it's fixed in all supported versions.
|
# HTMLParser. (http://bugs.python.org/issue13633) The bug has
|
||||||
# http://bugs.python.org/issue13633
|
# been fixed, but removing this code still makes some
|
||||||
|
# Beautiful Soup tests fail. This needs investigation.
|
||||||
if name.startswith('x'):
|
if name.startswith('x'):
|
||||||
real_name = int(name.lstrip('x'), 16)
|
real_name = int(name.lstrip('x'), 16)
|
||||||
elif name.startswith('X'):
|
elif name.startswith('X'):
|
||||||
|
@ -333,9 +300,6 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||||
parser_args = parser_args or []
|
parser_args = parser_args or []
|
||||||
parser_kwargs = parser_kwargs or {}
|
parser_kwargs = parser_kwargs or {}
|
||||||
parser_kwargs.update(extra_parser_kwargs)
|
parser_kwargs.update(extra_parser_kwargs)
|
||||||
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
|
|
||||||
parser_kwargs['strict'] = False
|
|
||||||
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
|
|
||||||
parser_kwargs['convert_charrefs'] = False
|
parser_kwargs['convert_charrefs'] = False
|
||||||
self.parser_args = (parser_args, parser_kwargs)
|
self.parser_args = (parser_args, parser_kwargs)
|
||||||
|
|
||||||
|
@ -395,105 +359,6 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||||
args, kwargs = self.parser_args
|
args, kwargs = self.parser_args
|
||||||
parser = BeautifulSoupHTMLParser(*args, **kwargs)
|
parser = BeautifulSoupHTMLParser(*args, **kwargs)
|
||||||
parser.soup = self.soup
|
parser.soup = self.soup
|
||||||
try:
|
|
||||||
parser.feed(markup)
|
parser.feed(markup)
|
||||||
parser.close()
|
parser.close()
|
||||||
except HTMLParseError as e:
|
|
||||||
warnings.warn(RuntimeWarning(
|
|
||||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
|
||||||
raise e
|
|
||||||
parser.already_closed_empty_element = []
|
parser.already_closed_empty_element = []
|
||||||
|
|
||||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
|
||||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
|
||||||
# string.
|
|
||||||
#
|
|
||||||
# XXX This code can be removed once most Python 3 users are on 3.2.3.
|
|
||||||
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
|
|
||||||
import re
|
|
||||||
attrfind_tolerant = re.compile(
|
|
||||||
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
|
|
||||||
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
|
|
||||||
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
|
|
||||||
|
|
||||||
locatestarttagend = re.compile(r"""
|
|
||||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
|
||||||
(?:\s+ # whitespace before attribute name
|
|
||||||
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
|
|
||||||
(?:\s*=\s* # value indicator
|
|
||||||
(?:'[^']*' # LITA-enclosed value
|
|
||||||
|\"[^\"]*\" # LIT-enclosed value
|
|
||||||
|[^'\">\s]+ # bare value
|
|
||||||
)
|
|
||||||
)?
|
|
||||||
)
|
|
||||||
)*
|
|
||||||
\s* # trailing whitespace
|
|
||||||
""", re.VERBOSE)
|
|
||||||
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
|
|
||||||
|
|
||||||
from html.parser import tagfind, attrfind
|
|
||||||
|
|
||||||
def parse_starttag(self, i):
|
|
||||||
self.__starttag_text = None
|
|
||||||
endpos = self.check_for_whole_start_tag(i)
|
|
||||||
if endpos < 0:
|
|
||||||
return endpos
|
|
||||||
rawdata = self.rawdata
|
|
||||||
self.__starttag_text = rawdata[i:endpos]
|
|
||||||
|
|
||||||
# Now parse the data between i+1 and j into a tag and attrs
|
|
||||||
attrs = []
|
|
||||||
match = tagfind.match(rawdata, i+1)
|
|
||||||
assert match, 'unexpected call to parse_starttag()'
|
|
||||||
k = match.end()
|
|
||||||
self.lasttag = tag = rawdata[i+1:k].lower()
|
|
||||||
while k < endpos:
|
|
||||||
if self.strict:
|
|
||||||
m = attrfind.match(rawdata, k)
|
|
||||||
else:
|
|
||||||
m = attrfind_tolerant.match(rawdata, k)
|
|
||||||
if not m:
|
|
||||||
break
|
|
||||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
|
||||||
if not rest:
|
|
||||||
attrvalue = None
|
|
||||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
|
||||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
|
||||||
attrvalue = attrvalue[1:-1]
|
|
||||||
if attrvalue:
|
|
||||||
attrvalue = self.unescape(attrvalue)
|
|
||||||
attrs.append((attrname.lower(), attrvalue))
|
|
||||||
k = m.end()
|
|
||||||
|
|
||||||
end = rawdata[k:endpos].strip()
|
|
||||||
if end not in (">", "/>"):
|
|
||||||
lineno, offset = self.getpos()
|
|
||||||
if "\n" in self.__starttag_text:
|
|
||||||
lineno = lineno + self.__starttag_text.count("\n")
|
|
||||||
offset = len(self.__starttag_text) \
|
|
||||||
- self.__starttag_text.rfind("\n")
|
|
||||||
else:
|
|
||||||
offset = offset + len(self.__starttag_text)
|
|
||||||
if self.strict:
|
|
||||||
self.error("junk characters in start tag: %r"
|
|
||||||
% (rawdata[k:endpos][:20],))
|
|
||||||
self.handle_data(rawdata[i:endpos])
|
|
||||||
return endpos
|
|
||||||
if end.endswith('/>'):
|
|
||||||
# XHTML-style empty tag: <span attr="value" />
|
|
||||||
self.handle_startendtag(tag, attrs)
|
|
||||||
else:
|
|
||||||
self.handle_starttag(tag, attrs)
|
|
||||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
|
||||||
self.set_cdata_mode(tag)
|
|
||||||
return endpos
|
|
||||||
|
|
||||||
def set_cdata_mode(self, elem):
|
|
||||||
self.cdata_elem = elem.lower()
|
|
||||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
|
||||||
|
|
||||||
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
|
|
||||||
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
|
|
||||||
|
|
||||||
CONSTRUCTOR_TAKES_STRICT = True
|
|
||||||
|
|
|
@ -496,13 +496,16 @@ class PageElement(object):
|
||||||
def extend(self, tags):
|
def extend(self, tags):
|
||||||
"""Appends the given PageElements to this one's contents.
|
"""Appends the given PageElements to this one's contents.
|
||||||
|
|
||||||
:param tags: A list of PageElements.
|
:param tags: A list of PageElements. If a single Tag is
|
||||||
|
provided instead, this PageElement's contents will be extended
|
||||||
|
with that Tag's contents.
|
||||||
"""
|
"""
|
||||||
if isinstance(tags, Tag):
|
if isinstance(tags, Tag):
|
||||||
# Calling self.append() on another tag's contents will change
|
tags = tags.contents
|
||||||
# the list we're iterating over. Make a list that won't
|
if isinstance(tags, list):
|
||||||
# change.
|
# Moving items around the tree may change their position in
|
||||||
tags = list(tags.contents)
|
# the original list. Make a list that won't change.
|
||||||
|
tags = list(tags)
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
self.append(tag)
|
self.append(tag)
|
||||||
|
|
||||||
|
@ -586,8 +589,9 @@ class PageElement(object):
|
||||||
:kwargs: A dictionary of filters on attribute values.
|
:kwargs: A dictionary of filters on attribute values.
|
||||||
:return: A ResultSet containing PageElements.
|
:return: A ResultSet containing PageElements.
|
||||||
"""
|
"""
|
||||||
|
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||||
return self._find_all(name, attrs, string, limit, self.next_elements,
|
return self._find_all(name, attrs, string, limit, self.next_elements,
|
||||||
**kwargs)
|
_stacklevel=_stacklevel+1, **kwargs)
|
||||||
findAllNext = find_all_next # BS3
|
findAllNext = find_all_next # BS3
|
||||||
|
|
||||||
def find_next_sibling(self, name=None, attrs={}, string=None, **kwargs):
|
def find_next_sibling(self, name=None, attrs={}, string=None, **kwargs):
|
||||||
|
@ -624,8 +628,11 @@ class PageElement(object):
|
||||||
:return: A ResultSet of PageElements.
|
:return: A ResultSet of PageElements.
|
||||||
:rtype: bs4.element.ResultSet
|
:rtype: bs4.element.ResultSet
|
||||||
"""
|
"""
|
||||||
return self._find_all(name, attrs, string, limit,
|
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||||
self.next_siblings, **kwargs)
|
return self._find_all(
|
||||||
|
name, attrs, string, limit,
|
||||||
|
self.next_siblings, _stacklevel=_stacklevel+1, **kwargs
|
||||||
|
)
|
||||||
findNextSiblings = find_next_siblings # BS3
|
findNextSiblings = find_next_siblings # BS3
|
||||||
fetchNextSiblings = find_next_siblings # BS2
|
fetchNextSiblings = find_next_siblings # BS2
|
||||||
|
|
||||||
|
@ -663,8 +670,11 @@ class PageElement(object):
|
||||||
:return: A ResultSet of PageElements.
|
:return: A ResultSet of PageElements.
|
||||||
:rtype: bs4.element.ResultSet
|
:rtype: bs4.element.ResultSet
|
||||||
"""
|
"""
|
||||||
return self._find_all(name, attrs, string, limit, self.previous_elements,
|
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||||
**kwargs)
|
return self._find_all(
|
||||||
|
name, attrs, string, limit, self.previous_elements,
|
||||||
|
_stacklevel=_stacklevel+1, **kwargs
|
||||||
|
)
|
||||||
findAllPrevious = find_all_previous # BS3
|
findAllPrevious = find_all_previous # BS3
|
||||||
fetchPrevious = find_all_previous # BS2
|
fetchPrevious = find_all_previous # BS2
|
||||||
|
|
||||||
|
@ -702,8 +712,11 @@ class PageElement(object):
|
||||||
:return: A ResultSet of PageElements.
|
:return: A ResultSet of PageElements.
|
||||||
:rtype: bs4.element.ResultSet
|
:rtype: bs4.element.ResultSet
|
||||||
"""
|
"""
|
||||||
return self._find_all(name, attrs, string, limit,
|
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||||
self.previous_siblings, **kwargs)
|
return self._find_all(
|
||||||
|
name, attrs, string, limit,
|
||||||
|
self.previous_siblings, _stacklevel=_stacklevel+1, **kwargs
|
||||||
|
)
|
||||||
findPreviousSiblings = find_previous_siblings # BS3
|
findPreviousSiblings = find_previous_siblings # BS3
|
||||||
fetchPreviousSiblings = find_previous_siblings # BS2
|
fetchPreviousSiblings = find_previous_siblings # BS2
|
||||||
|
|
||||||
|
@ -724,7 +737,7 @@ class PageElement(object):
|
||||||
# NOTE: We can't use _find_one because findParents takes a different
|
# NOTE: We can't use _find_one because findParents takes a different
|
||||||
# set of arguments.
|
# set of arguments.
|
||||||
r = None
|
r = None
|
||||||
l = self.find_parents(name, attrs, 1, **kwargs)
|
l = self.find_parents(name, attrs, 1, _stacklevel=3, **kwargs)
|
||||||
if l:
|
if l:
|
||||||
r = l[0]
|
r = l[0]
|
||||||
return r
|
return r
|
||||||
|
@ -744,8 +757,9 @@ class PageElement(object):
|
||||||
:return: A PageElement.
|
:return: A PageElement.
|
||||||
:rtype: bs4.element.Tag | bs4.element.NavigableString
|
:rtype: bs4.element.Tag | bs4.element.NavigableString
|
||||||
"""
|
"""
|
||||||
|
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||||
return self._find_all(name, attrs, None, limit, self.parents,
|
return self._find_all(name, attrs, None, limit, self.parents,
|
||||||
**kwargs)
|
_stacklevel=_stacklevel+1, **kwargs)
|
||||||
findParents = find_parents # BS3
|
findParents = find_parents # BS3
|
||||||
fetchParents = find_parents # BS2
|
fetchParents = find_parents # BS2
|
||||||
|
|
||||||
|
@ -771,19 +785,20 @@ class PageElement(object):
|
||||||
|
|
||||||
def _find_one(self, method, name, attrs, string, **kwargs):
|
def _find_one(self, method, name, attrs, string, **kwargs):
|
||||||
r = None
|
r = None
|
||||||
l = method(name, attrs, string, 1, **kwargs)
|
l = method(name, attrs, string, 1, _stacklevel=4, **kwargs)
|
||||||
if l:
|
if l:
|
||||||
r = l[0]
|
r = l[0]
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def _find_all(self, name, attrs, string, limit, generator, **kwargs):
|
def _find_all(self, name, attrs, string, limit, generator, **kwargs):
|
||||||
"Iterates over a generator looking for things that match."
|
"Iterates over a generator looking for things that match."
|
||||||
|
_stacklevel = kwargs.pop('_stacklevel', 3)
|
||||||
|
|
||||||
if string is None and 'text' in kwargs:
|
if string is None and 'text' in kwargs:
|
||||||
string = kwargs.pop('text')
|
string = kwargs.pop('text')
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"The 'text' argument to find()-type methods is deprecated. Use 'string' instead.",
|
"The 'text' argument to find()-type methods is deprecated. Use 'string' instead.",
|
||||||
DeprecationWarning
|
DeprecationWarning, stacklevel=_stacklevel
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(name, SoupStrainer):
|
if isinstance(name, SoupStrainer):
|
||||||
|
@ -1306,7 +1321,8 @@ class Tag(PageElement):
|
||||||
sourceline=self.sourceline, sourcepos=self.sourcepos,
|
sourceline=self.sourceline, sourcepos=self.sourcepos,
|
||||||
can_be_empty_element=self.can_be_empty_element,
|
can_be_empty_element=self.can_be_empty_element,
|
||||||
cdata_list_attributes=self.cdata_list_attributes,
|
cdata_list_attributes=self.cdata_list_attributes,
|
||||||
preserve_whitespace_tags=self.preserve_whitespace_tags
|
preserve_whitespace_tags=self.preserve_whitespace_tags,
|
||||||
|
interesting_string_types=self.interesting_string_types
|
||||||
)
|
)
|
||||||
for attr in ('can_be_empty_element', 'hidden'):
|
for attr in ('can_be_empty_element', 'hidden'):
|
||||||
setattr(clone, attr, getattr(self, attr))
|
setattr(clone, attr, getattr(self, attr))
|
||||||
|
@ -1558,7 +1574,7 @@ class Tag(PageElement):
|
||||||
'.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict(
|
'.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict(
|
||||||
name=tag_name
|
name=tag_name
|
||||||
),
|
),
|
||||||
DeprecationWarning
|
DeprecationWarning, stacklevel=2
|
||||||
)
|
)
|
||||||
return self.find(tag_name)
|
return self.find(tag_name)
|
||||||
# We special case contents to avoid recursion.
|
# We special case contents to avoid recursion.
|
||||||
|
@ -1862,7 +1878,8 @@ class Tag(PageElement):
|
||||||
:rtype: bs4.element.Tag | bs4.element.NavigableString
|
:rtype: bs4.element.Tag | bs4.element.NavigableString
|
||||||
"""
|
"""
|
||||||
r = None
|
r = None
|
||||||
l = self.find_all(name, attrs, recursive, string, 1, **kwargs)
|
l = self.find_all(name, attrs, recursive, string, 1, _stacklevel=3,
|
||||||
|
**kwargs)
|
||||||
if l:
|
if l:
|
||||||
r = l[0]
|
r = l[0]
|
||||||
return r
|
return r
|
||||||
|
@ -1889,7 +1906,9 @@ class Tag(PageElement):
|
||||||
generator = self.descendants
|
generator = self.descendants
|
||||||
if not recursive:
|
if not recursive:
|
||||||
generator = self.children
|
generator = self.children
|
||||||
return self._find_all(name, attrs, string, limit, generator, **kwargs)
|
_stacklevel = kwargs.pop('_stacklevel', 2)
|
||||||
|
return self._find_all(name, attrs, string, limit, generator,
|
||||||
|
_stacklevel=_stacklevel+1, **kwargs)
|
||||||
findAll = find_all # BS3
|
findAll = find_all # BS3
|
||||||
findChildren = find_all # BS2
|
findChildren = find_all # BS2
|
||||||
|
|
||||||
|
@ -1993,7 +2012,7 @@ class Tag(PageElement):
|
||||||
"""
|
"""
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
'has_key is deprecated. Use has_attr(key) instead.',
|
'has_key is deprecated. Use has_attr(key) instead.',
|
||||||
DeprecationWarning
|
DeprecationWarning, stacklevel=2
|
||||||
)
|
)
|
||||||
return self.has_attr(key)
|
return self.has_attr(key)
|
||||||
|
|
||||||
|
@ -2024,7 +2043,7 @@ class SoupStrainer(object):
|
||||||
string = kwargs.pop('text')
|
string = kwargs.pop('text')
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"The 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead.",
|
"The 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead.",
|
||||||
DeprecationWarning
|
DeprecationWarning, stacklevel=2
|
||||||
)
|
)
|
||||||
|
|
||||||
self.name = self._normalize_search_value(name)
|
self.name = self._normalize_search_value(name)
|
||||||
|
|
|
@ -149,14 +149,14 @@ class HTMLFormatter(Formatter):
|
||||||
"""A generic Formatter for HTML."""
|
"""A generic Formatter for HTML."""
|
||||||
REGISTRY = {}
|
REGISTRY = {}
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
|
super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class XMLFormatter(Formatter):
|
class XMLFormatter(Formatter):
|
||||||
"""A generic Formatter for XML."""
|
"""A generic Formatter for XML."""
|
||||||
REGISTRY = {}
|
REGISTRY = {}
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
|
super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
# Set up aliases for the default formatters.
|
# Set up aliases for the default formatters.
|
||||||
|
|
|
@ -29,6 +29,29 @@ from bs4.builder import (
|
||||||
)
|
)
|
||||||
default_builder = HTMLParserTreeBuilder
|
default_builder = HTMLParserTreeBuilder
|
||||||
|
|
||||||
|
# Some tests depend on specific third-party libraries. We use
|
||||||
|
# @pytest.mark.skipIf on the following conditionals to skip them
|
||||||
|
# if the libraries are not installed.
|
||||||
|
try:
|
||||||
|
from soupsieve import SelectorSyntaxError
|
||||||
|
SOUP_SIEVE_PRESENT = True
|
||||||
|
except ImportError:
|
||||||
|
SOUP_SIEVE_PRESENT = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import html5lib
|
||||||
|
HTML5LIB_PRESENT = True
|
||||||
|
except ImportError:
|
||||||
|
HTML5LIB_PRESENT = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import lxml.etree
|
||||||
|
LXML_PRESENT = True
|
||||||
|
LXML_VERSION = lxml.etree.LXML_VERSION
|
||||||
|
except ImportError:
|
||||||
|
LXML_PRESENT = False
|
||||||
|
LXML_VERSION = (0,)
|
||||||
|
|
||||||
BAD_DOCUMENT = """A bare string
|
BAD_DOCUMENT = """A bare string
|
||||||
<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
|
<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
|
||||||
<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
|
<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
|
||||||
|
@ -258,10 +281,10 @@ class TreeBuilderSmokeTest(object):
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"multi_valued_attributes",
|
"multi_valued_attributes",
|
||||||
[None, dict(b=['class']), {'*': ['notclass']}]
|
[None, {}, dict(b=['class']), {'*': ['notclass']}]
|
||||||
)
|
)
|
||||||
def test_attribute_not_multi_valued(self, multi_valued_attributes):
|
def test_attribute_not_multi_valued(self, multi_valued_attributes):
|
||||||
markup = '<a class="a b c">'
|
markup = '<html xmlns="http://www.w3.org/1999/xhtml"><a class="a b c"></html>'
|
||||||
soup = self.soup(markup, multi_valued_attributes=multi_valued_attributes)
|
soup = self.soup(markup, multi_valued_attributes=multi_valued_attributes)
|
||||||
assert soup.a['class'] == 'a b c'
|
assert soup.a['class'] == 'a b c'
|
||||||
|
|
||||||
|
@ -820,26 +843,27 @@ Hello, world!
|
||||||
soup = self.soup(text)
|
soup = self.soup(text)
|
||||||
assert soup.p.encode("utf-8") == expected
|
assert soup.p.encode("utf-8") == expected
|
||||||
|
|
||||||
def test_real_iso_latin_document(self):
|
def test_real_iso_8859_document(self):
|
||||||
# Smoke test of interrelated functionality, using an
|
# Smoke test of interrelated functionality, using an
|
||||||
# easy-to-understand document.
|
# easy-to-understand document.
|
||||||
|
|
||||||
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
|
# Here it is in Unicode. Note that it claims to be in ISO-8859-1.
|
||||||
unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
unicode_html = '<html><head><meta content="text/html; charset=ISO-8859-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
||||||
|
|
||||||
# That's because we're going to encode it into ISO-Latin-1, and use
|
# That's because we're going to encode it into ISO-8859-1,
|
||||||
# that to test.
|
# and use that to test.
|
||||||
iso_latin_html = unicode_html.encode("iso-8859-1")
|
iso_latin_html = unicode_html.encode("iso-8859-1")
|
||||||
|
|
||||||
# Parse the ISO-Latin-1 HTML.
|
# Parse the ISO-8859-1 HTML.
|
||||||
soup = self.soup(iso_latin_html)
|
soup = self.soup(iso_latin_html)
|
||||||
|
|
||||||
# Encode it to UTF-8.
|
# Encode it to UTF-8.
|
||||||
result = soup.encode("utf-8")
|
result = soup.encode("utf-8")
|
||||||
|
|
||||||
# What do we expect the result to look like? Well, it would
|
# What do we expect the result to look like? Well, it would
|
||||||
# look like unicode_html, except that the META tag would say
|
# look like unicode_html, except that the META tag would say
|
||||||
# UTF-8 instead of ISO-Latin-1.
|
# UTF-8 instead of ISO-8859-1.
|
||||||
expected = unicode_html.replace("ISO-Latin-1", "utf-8")
|
expected = unicode_html.replace("ISO-8859-1", "utf-8")
|
||||||
|
|
||||||
# And, of course, it would be in UTF-8, not Unicode.
|
# And, of course, it would be in UTF-8, not Unicode.
|
||||||
expected = expected.encode("utf-8")
|
expected = expected.encode("utf-8")
|
||||||
|
@ -1177,15 +1201,3 @@ class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||||
assert isinstance(soup.contents[0], Comment)
|
assert isinstance(soup.contents[0], Comment)
|
||||||
assert soup.contents[0] == '?xml version="1.0" encoding="utf-8"?'
|
assert soup.contents[0] == '?xml version="1.0" encoding="utf-8"?'
|
||||||
assert "html" == soup.contents[0].next_element.name
|
assert "html" == soup.contents[0].next_element.name
|
||||||
|
|
||||||
def skipIf(condition, reason):
|
|
||||||
def nothing(test, *args, **kwargs):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def decorator(test_item):
|
|
||||||
if condition:
|
|
||||||
return nothing
|
|
||||||
else:
|
|
||||||
return test_item
|
|
||||||
|
|
||||||
return decorator
|
|
||||||
|
|
|
@ -10,22 +10,23 @@ from bs4.builder import (
|
||||||
TreeBuilderRegistry,
|
TreeBuilderRegistry,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
from . import (
|
||||||
from bs4.builder import HTML5TreeBuilder
|
HTML5LIB_PRESENT,
|
||||||
HTML5LIB_PRESENT = True
|
LXML_PRESENT,
|
||||||
except ImportError:
|
)
|
||||||
HTML5LIB_PRESENT = False
|
|
||||||
|
|
||||||
try:
|
if HTML5LIB_PRESENT:
|
||||||
|
from bs4.builder import HTML5TreeBuilder
|
||||||
|
|
||||||
|
if LXML_PRESENT:
|
||||||
from bs4.builder import (
|
from bs4.builder import (
|
||||||
LXMLTreeBuilderForXML,
|
LXMLTreeBuilderForXML,
|
||||||
LXMLTreeBuilder,
|
LXMLTreeBuilder,
|
||||||
)
|
)
|
||||||
LXML_PRESENT = True
|
|
||||||
except ImportError:
|
|
||||||
LXML_PRESENT = False
|
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Split out the lxml and html5lib tests into their own classes
|
||||||
|
# and gate with pytest.mark.skipIf.
|
||||||
class TestBuiltInRegistry(object):
|
class TestBuiltInRegistry(object):
|
||||||
"""Test the built-in registry with the default builders registered."""
|
"""Test the built-in registry with the default builders registered."""
|
||||||
|
|
||||||
|
|
|
@ -17,25 +17,23 @@ class TestUnicodeDammit(object):
|
||||||
dammit = UnicodeDammit(markup)
|
dammit = UnicodeDammit(markup)
|
||||||
assert dammit.unicode_markup == markup
|
assert dammit.unicode_markup == markup
|
||||||
|
|
||||||
def test_smart_quotes_to_unicode(self):
|
@pytest.mark.parametrize(
|
||||||
|
"smart_quotes_to,expect_converted",
|
||||||
|
[(None, "\u2018\u2019\u201c\u201d"),
|
||||||
|
("xml", "‘’“”"),
|
||||||
|
("html", "‘’“”"),
|
||||||
|
("ascii", "''" + '""'),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_smart_quotes_to(self, smart_quotes_to, expect_converted):
|
||||||
|
"""Verify the functionality of the smart_quotes_to argument
|
||||||
|
to the UnicodeDammit constructor."""
|
||||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||||
dammit = UnicodeDammit(markup)
|
converted = UnicodeDammit(
|
||||||
assert dammit.unicode_markup == "<foo>\u2018\u2019\u201c\u201d</foo>"
|
markup, known_definite_encodings=["windows-1252"],
|
||||||
|
smart_quotes_to=smart_quotes_to
|
||||||
def test_smart_quotes_to_xml_entities(self):
|
).unicode_markup
|
||||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
assert converted == "<foo>{}</foo>".format(expect_converted)
|
||||||
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
|
|
||||||
assert dammit.unicode_markup == "<foo>‘’“”</foo>"
|
|
||||||
|
|
||||||
def test_smart_quotes_to_html_entities(self):
|
|
||||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
|
||||||
dammit = UnicodeDammit(markup, smart_quotes_to="html")
|
|
||||||
assert dammit.unicode_markup == "<foo>‘’“”</foo>"
|
|
||||||
|
|
||||||
def test_smart_quotes_to_ascii(self):
|
|
||||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
|
||||||
dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
|
|
||||||
assert dammit.unicode_markup == """<foo>''""</foo>"""
|
|
||||||
|
|
||||||
def test_detect_utf8(self):
|
def test_detect_utf8(self):
|
||||||
utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
|
utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
|
||||||
|
@ -275,23 +273,24 @@ class TestEntitySubstitution(object):
|
||||||
def setup_method(self):
|
def setup_method(self):
|
||||||
self.sub = EntitySubstitution
|
self.sub = EntitySubstitution
|
||||||
|
|
||||||
def test_simple_html_substitution(self):
|
|
||||||
# Unicode characters corresponding to named HTML entites
|
|
||||||
# are substituted, and no others.
|
|
||||||
s = "foo\u2200\N{SNOWMAN}\u00f5bar"
|
|
||||||
assert self.sub.substitute_html(s) == "foo∀\N{SNOWMAN}õbar"
|
|
||||||
|
|
||||||
def test_smart_quote_substitution(self):
|
@pytest.mark.parametrize(
|
||||||
|
"original,substituted",
|
||||||
|
[
|
||||||
|
# Basic case. Unicode characters corresponding to named
|
||||||
|
# HTML entites are substituted; others are not.
|
||||||
|
("foo\u2200\N{SNOWMAN}\u00f5bar",
|
||||||
|
"foo∀\N{SNOWMAN}õbar"),
|
||||||
|
|
||||||
# MS smart quotes are a common source of frustration, so we
|
# MS smart quotes are a common source of frustration, so we
|
||||||
# give them a special test.
|
# give them a special test.
|
||||||
quotes = b"\x91\x92foo\x93\x94"
|
('‘’foo“”', "‘’foo“”"),
|
||||||
dammit = UnicodeDammit(quotes)
|
]
|
||||||
assert self.sub.substitute_html(dammit.markup) == "‘’foo“”"
|
)
|
||||||
|
def test_substitute_html(self, original, substituted):
|
||||||
|
assert self.sub.substitute_html(original) == substituted
|
||||||
|
|
||||||
def test_html5_entity(self):
|
def test_html5_entity(self):
|
||||||
# Some HTML5 entities correspond to single- or multi-character
|
|
||||||
# Unicode sequences.
|
|
||||||
|
|
||||||
for entity, u in (
|
for entity, u in (
|
||||||
# A few spot checks of our ability to recognize
|
# A few spot checks of our ability to recognize
|
||||||
# special character sequences and convert them
|
# special character sequences and convert them
|
||||||
|
|
|
@ -1,27 +1,26 @@
|
||||||
"""Tests to ensure that the html5lib tree builder generates good trees."""
|
"""Tests to ensure that the html5lib tree builder generates good trees."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
try:
|
from bs4 import BeautifulSoup
|
||||||
from bs4.builder import HTML5TreeBuilder
|
|
||||||
HTML5LIB_PRESENT = True
|
|
||||||
except ImportError as e:
|
|
||||||
HTML5LIB_PRESENT = False
|
|
||||||
from bs4.element import SoupStrainer
|
from bs4.element import SoupStrainer
|
||||||
from . import (
|
from . import (
|
||||||
|
HTML5LIB_PRESENT,
|
||||||
HTML5TreeBuilderSmokeTest,
|
HTML5TreeBuilderSmokeTest,
|
||||||
SoupTest,
|
SoupTest,
|
||||||
skipIf,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@skipIf(
|
@pytest.mark.skipif(
|
||||||
not HTML5LIB_PRESENT,
|
not HTML5LIB_PRESENT,
|
||||||
"html5lib seems not to be present, not testing its tree builder.")
|
reason="html5lib seems not to be present, not testing its tree builder."
|
||||||
|
)
|
||||||
class TestHTML5LibBuilder(SoupTest, HTML5TreeBuilderSmokeTest):
|
class TestHTML5LibBuilder(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||||
"""See ``HTML5TreeBuilderSmokeTest``."""
|
"""See ``HTML5TreeBuilderSmokeTest``."""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def default_builder(self):
|
def default_builder(self):
|
||||||
|
from bs4.builder import HTML5TreeBuilder
|
||||||
return HTML5TreeBuilder
|
return HTML5TreeBuilder
|
||||||
|
|
||||||
def test_soupstrainer(self):
|
def test_soupstrainer(self):
|
||||||
|
@ -29,10 +28,12 @@ class TestHTML5LibBuilder(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||||
strainer = SoupStrainer("b")
|
strainer = SoupStrainer("b")
|
||||||
markup = "<p>A <b>bold</b> statement.</p>"
|
markup = "<p>A <b>bold</b> statement.</p>"
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
soup = self.soup(markup, parse_only=strainer)
|
soup = BeautifulSoup(markup, "html5lib", parse_only=strainer)
|
||||||
assert soup.decode() == self.document_for(markup)
|
assert soup.decode() == self.document_for(markup)
|
||||||
|
|
||||||
assert "the html5lib tree builder doesn't support parse_only" in str(w[0].message)
|
[warning] = w
|
||||||
|
assert warning.filename == __file__
|
||||||
|
assert "the html5lib tree builder doesn't support parse_only" in str(warning.message)
|
||||||
|
|
||||||
def test_correctly_nested_tables(self):
|
def test_correctly_nested_tables(self):
|
||||||
"""html5lib inserts <tbody> tags where other parsers don't."""
|
"""html5lib inserts <tbody> tags where other parsers don't."""
|
||||||
|
|
|
@ -122,15 +122,3 @@ class TestHTMLParserTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||||
with_element = div.encode(formatter="html")
|
with_element = div.encode(formatter="html")
|
||||||
expect = b"<div>%s</div>" % output_element
|
expect = b"<div>%s</div>" % output_element
|
||||||
assert with_element == expect
|
assert with_element == expect
|
||||||
|
|
||||||
class TestHTMLParserSubclass(SoupTest):
|
|
||||||
def test_error(self):
|
|
||||||
"""Verify that our HTMLParser subclass implements error() in a way
|
|
||||||
that doesn't cause a crash.
|
|
||||||
"""
|
|
||||||
parser = BeautifulSoupHTMLParser()
|
|
||||||
with warnings.catch_warnings(record=True) as warns:
|
|
||||||
parser.error("don't crash")
|
|
||||||
[warning] = warns
|
|
||||||
assert "don't crash" == str(warning.message)
|
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,10 @@
|
||||||
"""Tests to ensure that the lxml tree builder generates good trees."""
|
"""Tests to ensure that the lxml tree builder generates good trees."""
|
||||||
|
|
||||||
import pickle
|
import pickle
|
||||||
|
import pytest
|
||||||
import re
|
import re
|
||||||
import warnings
|
import warnings
|
||||||
|
from . import LXML_PRESENT, LXML_VERSION
|
||||||
try:
|
|
||||||
import lxml.etree
|
|
||||||
LXML_PRESENT = True
|
|
||||||
LXML_VERSION = lxml.etree.LXML_VERSION
|
|
||||||
except ImportError as e:
|
|
||||||
LXML_PRESENT = False
|
|
||||||
LXML_VERSION = (0,)
|
|
||||||
|
|
||||||
if LXML_PRESENT:
|
if LXML_PRESENT:
|
||||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||||
|
@ -23,13 +17,14 @@ from bs4.element import Comment, Doctype, SoupStrainer
|
||||||
from . import (
|
from . import (
|
||||||
HTMLTreeBuilderSmokeTest,
|
HTMLTreeBuilderSmokeTest,
|
||||||
XMLTreeBuilderSmokeTest,
|
XMLTreeBuilderSmokeTest,
|
||||||
|
SOUP_SIEVE_PRESENT,
|
||||||
SoupTest,
|
SoupTest,
|
||||||
skipIf,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@skipIf(
|
@pytest.mark.skipif(
|
||||||
not LXML_PRESENT,
|
not LXML_PRESENT,
|
||||||
"lxml seems not to be present, not testing its tree builder.")
|
reason="lxml seems not to be present, not testing its tree builder."
|
||||||
|
)
|
||||||
class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||||
|
|
||||||
|
@ -54,9 +49,10 @@ class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||||
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
|
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
|
||||||
# test if an old version of lxml is installed.
|
# test if an old version of lxml is installed.
|
||||||
|
|
||||||
@skipIf(
|
@pytest.mark.skipif(
|
||||||
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
|
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
|
||||||
"Skipping doctype test for old version of lxml to avoid segfault.")
|
reason="Skipping doctype test for old version of lxml to avoid segfault."
|
||||||
|
)
|
||||||
def test_empty_doctype(self):
|
def test_empty_doctype(self):
|
||||||
soup = self.soup("<!DOCTYPE>")
|
soup = self.soup("<!DOCTYPE>")
|
||||||
doctype = soup.contents[0]
|
doctype = soup.contents[0]
|
||||||
|
@ -68,7 +64,9 @@ class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
soup = BeautifulStoneSoup("<b />")
|
soup = BeautifulStoneSoup("<b />")
|
||||||
assert "<b/>" == str(soup.b)
|
assert "<b/>" == str(soup.b)
|
||||||
assert "BeautifulStoneSoup class is deprecated" in str(w[0].message)
|
[warning] = w
|
||||||
|
assert warning.filename == __file__
|
||||||
|
assert "BeautifulStoneSoup class is deprecated" in str(warning.message)
|
||||||
|
|
||||||
def test_tracking_line_numbers(self):
|
def test_tracking_line_numbers(self):
|
||||||
# The lxml TreeBuilder cannot keep track of line numbers from
|
# The lxml TreeBuilder cannot keep track of line numbers from
|
||||||
|
@ -85,9 +83,10 @@ class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||||
assert "sourceline" == soup.p.sourceline.name
|
assert "sourceline" == soup.p.sourceline.name
|
||||||
assert "sourcepos" == soup.p.sourcepos.name
|
assert "sourcepos" == soup.p.sourcepos.name
|
||||||
|
|
||||||
@skipIf(
|
@pytest.mark.skipif(
|
||||||
not LXML_PRESENT,
|
not LXML_PRESENT,
|
||||||
"lxml seems not to be present, not testing its XML tree builder.")
|
reason="lxml seems not to be present, not testing its XML tree builder."
|
||||||
|
)
|
||||||
class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
|
class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
|
||||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||||
|
|
||||||
|
@ -148,6 +147,9 @@ class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed"
|
||||||
|
)
|
||||||
def test_namespace_interaction_with_select_and_find(self):
|
def test_namespace_interaction_with_select_and_find(self):
|
||||||
# Demonstrate how namespaces interact with select* and
|
# Demonstrate how namespaces interact with select* and
|
||||||
# find* methods.
|
# find* methods.
|
||||||
|
|
|
@ -3,15 +3,18 @@ import copy
|
||||||
import pickle
|
import pickle
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from soupsieve import SelectorSyntaxError
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import (
|
from bs4.element import (
|
||||||
Comment,
|
Comment,
|
||||||
SoupStrainer,
|
SoupStrainer,
|
||||||
)
|
)
|
||||||
from . import SoupTest
|
from . import (
|
||||||
|
SoupTest,
|
||||||
|
SOUP_SIEVE_PRESENT,
|
||||||
|
)
|
||||||
|
|
||||||
|
if SOUP_SIEVE_PRESENT:
|
||||||
|
from soupsieve import SelectorSyntaxError
|
||||||
|
|
||||||
class TestEncoding(SoupTest):
|
class TestEncoding(SoupTest):
|
||||||
"""Test the ability to encode objects into strings."""
|
"""Test the ability to encode objects into strings."""
|
||||||
|
@ -213,6 +216,7 @@ class TestFormatters(SoupTest):
|
||||||
assert soup.contents[0].name == 'pre'
|
assert soup.contents[0].name == 'pre'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
|
||||||
class TestCSSSelectors(SoupTest):
|
class TestCSSSelectors(SoupTest):
|
||||||
"""Test basic CSS selector functionality.
|
"""Test basic CSS selector functionality.
|
||||||
|
|
||||||
|
@ -694,6 +698,7 @@ class TestPersistence(SoupTest):
|
||||||
assert tag.can_be_empty_element == copied.can_be_empty_element
|
assert tag.can_be_empty_element == copied.can_be_empty_element
|
||||||
assert tag.cdata_list_attributes == copied.cdata_list_attributes
|
assert tag.cdata_list_attributes == copied.cdata_list_attributes
|
||||||
assert tag.preserve_whitespace_tags == copied.preserve_whitespace_tags
|
assert tag.preserve_whitespace_tags == copied.preserve_whitespace_tags
|
||||||
|
assert tag.interesting_string_types == copied.interesting_string_types
|
||||||
|
|
||||||
def test_unicode_pickle(self):
|
def test_unicode_pickle(self):
|
||||||
# A tree containing Unicode characters can be pickled.
|
# A tree containing Unicode characters can be pickled.
|
||||||
|
|
|
@ -30,19 +30,11 @@ from bs4.element import (
|
||||||
|
|
||||||
from . import (
|
from . import (
|
||||||
default_builder,
|
default_builder,
|
||||||
|
LXML_PRESENT,
|
||||||
SoupTest,
|
SoupTest,
|
||||||
skipIf,
|
|
||||||
)
|
)
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
try:
|
|
||||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
|
||||||
LXML_PRESENT = True
|
|
||||||
except ImportError as e:
|
|
||||||
LXML_PRESENT = False
|
|
||||||
|
|
||||||
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
|
|
||||||
|
|
||||||
class TestConstructor(SoupTest):
|
class TestConstructor(SoupTest):
|
||||||
|
|
||||||
def test_short_unicode_input(self):
|
def test_short_unicode_input(self):
|
||||||
|
@ -139,7 +131,7 @@ class TestConstructor(SoupTest):
|
||||||
assert " an id " == a['id']
|
assert " an id " == a['id']
|
||||||
assert ["a", "class"] == a['class']
|
assert ["a", "class"] == a['class']
|
||||||
|
|
||||||
# TreeBuilder takes an argument called 'mutli_valued_attributes' which lets
|
# TreeBuilder takes an argument called 'multi_valued_attributes' which lets
|
||||||
# you customize or disable this. As always, you can customize the TreeBuilder
|
# you customize or disable this. As always, you can customize the TreeBuilder
|
||||||
# by passing in a keyword argument to the BeautifulSoup constructor.
|
# by passing in a keyword argument to the BeautifulSoup constructor.
|
||||||
soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)
|
soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)
|
||||||
|
@ -219,10 +211,17 @@ class TestConstructor(SoupTest):
|
||||||
|
|
||||||
|
|
||||||
class TestWarnings(SoupTest):
|
class TestWarnings(SoupTest):
|
||||||
|
# Note that some of the tests in this class create BeautifulSoup
|
||||||
|
# objects directly rather than using self.soup(). That's
|
||||||
|
# because SoupTest.soup is defined in a different file,
|
||||||
|
# which will throw off the assertion in _assert_warning
|
||||||
|
# that the code that triggered the warning is in the same
|
||||||
|
# file as the test.
|
||||||
|
|
||||||
def _assert_warning(self, warnings, cls):
|
def _assert_warning(self, warnings, cls):
|
||||||
for w in warnings:
|
for w in warnings:
|
||||||
if isinstance(w.message, cls):
|
if isinstance(w.message, cls):
|
||||||
|
assert w.filename == __file__
|
||||||
return w
|
return w
|
||||||
raise Exception("%s warning not found in %r" % (cls, warnings))
|
raise Exception("%s warning not found in %r" % (cls, warnings))
|
||||||
|
|
||||||
|
@ -243,13 +242,17 @@ class TestWarnings(SoupTest):
|
||||||
|
|
||||||
def test_no_warning_if_explicit_parser_specified(self):
|
def test_no_warning_if_explicit_parser_specified(self):
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
soup = BeautifulSoup("<a><b></b></a>", "html.parser")
|
soup = self.soup("<a><b></b></a>")
|
||||||
assert [] == w
|
assert [] == w
|
||||||
|
|
||||||
def test_parseOnlyThese_renamed_to_parse_only(self):
|
def test_parseOnlyThese_renamed_to_parse_only(self):
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
|
soup = BeautifulSoup(
|
||||||
msg = str(w[0].message)
|
"<a><b></b></a>", "html.parser",
|
||||||
|
parseOnlyThese=SoupStrainer("b"),
|
||||||
|
)
|
||||||
|
warning = self._assert_warning(w, DeprecationWarning)
|
||||||
|
msg = str(warning.message)
|
||||||
assert "parseOnlyThese" in msg
|
assert "parseOnlyThese" in msg
|
||||||
assert "parse_only" in msg
|
assert "parse_only" in msg
|
||||||
assert b"<b></b>" == soup.encode()
|
assert b"<b></b>" == soup.encode()
|
||||||
|
@ -257,8 +260,11 @@ class TestWarnings(SoupTest):
|
||||||
def test_fromEncoding_renamed_to_from_encoding(self):
|
def test_fromEncoding_renamed_to_from_encoding(self):
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
utf8 = b"\xc3\xa9"
|
utf8 = b"\xc3\xa9"
|
||||||
soup = self.soup(utf8, fromEncoding="utf8")
|
soup = BeautifulSoup(
|
||||||
msg = str(w[0].message)
|
utf8, "html.parser", fromEncoding="utf8"
|
||||||
|
)
|
||||||
|
warning = self._assert_warning(w, DeprecationWarning)
|
||||||
|
msg = str(warning.message)
|
||||||
assert "fromEncoding" in msg
|
assert "fromEncoding" in msg
|
||||||
assert "from_encoding" in msg
|
assert "from_encoding" in msg
|
||||||
assert "utf8" == soup.original_encoding
|
assert "utf8" == soup.original_encoding
|
||||||
|
@ -276,7 +282,7 @@ class TestWarnings(SoupTest):
|
||||||
# A warning is issued if the "markup" looks like the name of
|
# A warning is issued if the "markup" looks like the name of
|
||||||
# an HTML or text file, or a full path to a file on disk.
|
# an HTML or text file, or a full path to a file on disk.
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
soup = self.soup("markup" + extension)
|
soup = BeautifulSoup("markup" + extension, "html.parser")
|
||||||
warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
|
warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
|
||||||
assert "looks more like a filename" in str(warning.message)
|
assert "looks more like a filename" in str(warning.message)
|
||||||
|
|
||||||
|
@ -295,7 +301,7 @@ class TestWarnings(SoupTest):
|
||||||
def test_url_warning_with_bytes_url(self):
|
def test_url_warning_with_bytes_url(self):
|
||||||
url = b"http://www.crummybytes.com/"
|
url = b"http://www.crummybytes.com/"
|
||||||
with warnings.catch_warnings(record=True) as warning_list:
|
with warnings.catch_warnings(record=True) as warning_list:
|
||||||
soup = self.soup(url)
|
soup = BeautifulSoup(url, "html.parser")
|
||||||
warning = self._assert_warning(
|
warning = self._assert_warning(
|
||||||
warning_list, MarkupResemblesLocatorWarning
|
warning_list, MarkupResemblesLocatorWarning
|
||||||
)
|
)
|
||||||
|
@ -307,7 +313,7 @@ class TestWarnings(SoupTest):
|
||||||
with warnings.catch_warnings(record=True) as warning_list:
|
with warnings.catch_warnings(record=True) as warning_list:
|
||||||
# note - this url must differ from the bytes one otherwise
|
# note - this url must differ from the bytes one otherwise
|
||||||
# python's warnings system swallows the second warning
|
# python's warnings system swallows the second warning
|
||||||
soup = self.soup(url)
|
soup = BeautifulSoup(url, "html.parser")
|
||||||
warning = self._assert_warning(
|
warning = self._assert_warning(
|
||||||
warning_list, MarkupResemblesLocatorWarning
|
warning_list, MarkupResemblesLocatorWarning
|
||||||
)
|
)
|
||||||
|
@ -348,9 +354,12 @@ class TestNewTag(SoupTest):
|
||||||
assert dict(bar="baz", name="a name") == new_tag.attrs
|
assert dict(bar="baz", name="a name") == new_tag.attrs
|
||||||
assert None == new_tag.parent
|
assert None == new_tag.parent
|
||||||
|
|
||||||
def test_tag_inherits_self_closing_rules_from_builder(self):
|
@pytest.mark.skipif(
|
||||||
if LXML_PRESENT:
|
not LXML_PRESENT,
|
||||||
xml_soup = BeautifulSoup("", "lxml-xml")
|
reason="lxml not installed, cannot parse XML document"
|
||||||
|
)
|
||||||
|
def test_xml_tag_inherits_self_closing_rules_from_builder(self):
|
||||||
|
xml_soup = BeautifulSoup("", "xml")
|
||||||
xml_br = xml_soup.new_tag("br")
|
xml_br = xml_soup.new_tag("br")
|
||||||
xml_p = xml_soup.new_tag("p")
|
xml_p = xml_soup.new_tag("p")
|
||||||
|
|
||||||
|
@ -359,6 +368,7 @@ class TestNewTag(SoupTest):
|
||||||
assert b"<br/>" == xml_br.encode()
|
assert b"<br/>" == xml_br.encode()
|
||||||
assert b"<p/>" == xml_p.encode()
|
assert b"<p/>" == xml_p.encode()
|
||||||
|
|
||||||
|
def test_tag_inherits_self_closing_rules_from_builder(self):
|
||||||
html_soup = BeautifulSoup("", "html.parser")
|
html_soup = BeautifulSoup("", "html.parser")
|
||||||
html_br = html_soup.new_tag("br")
|
html_br = html_soup.new_tag("br")
|
||||||
html_p = html_soup.new_tag("p")
|
html_p = html_soup.new_tag("p")
|
||||||
|
@ -450,13 +460,3 @@ class TestEncodingConversion(SoupTest):
|
||||||
# The internal data structures can be encoded as UTF-8.
|
# The internal data structures can be encoded as UTF-8.
|
||||||
soup_from_unicode = self.soup(self.unicode_data)
|
soup_from_unicode = self.soup(self.unicode_data)
|
||||||
assert soup_from_unicode.encode('utf-8') == self.utf8_data
|
assert soup_from_unicode.encode('utf-8') == self.utf8_data
|
||||||
|
|
||||||
@skipIf(
|
|
||||||
PYTHON_3_PRE_3_2,
|
|
||||||
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
|
|
||||||
def test_attribute_name_containing_unicode_characters(self):
|
|
||||||
markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
|
|
||||||
assert self.soup(markup).div.encode("utf8") == markup.encode("utf8")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,6 @@ from bs4.element import (
|
||||||
)
|
)
|
||||||
from . import (
|
from . import (
|
||||||
SoupTest,
|
SoupTest,
|
||||||
skipIf,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
class TestFind(SoupTest):
|
class TestFind(SoupTest):
|
||||||
|
@ -910,12 +909,16 @@ class TestTreeModification(SoupTest):
|
||||||
soup.a.extend(l)
|
soup.a.extend(l)
|
||||||
assert "<a><g></g><f></f><e></e><d></d><c></c><b></b></a>" == soup.decode()
|
assert "<a><g></g><f></f><e></e><d></d><c></c><b></b></a>" == soup.decode()
|
||||||
|
|
||||||
def test_extend_with_another_tags_contents(self):
|
@pytest.mark.parametrize(
|
||||||
|
"get_tags", [lambda tag: tag, lambda tag: tag.contents]
|
||||||
|
)
|
||||||
|
def test_extend_with_another_tags_contents(self, get_tags):
|
||||||
data = '<body><div id="d1"><a>1</a><a>2</a><a>3</a><a>4</a></div><div id="d2"></div></body>'
|
data = '<body><div id="d1"><a>1</a><a>2</a><a>3</a><a>4</a></div><div id="d2"></div></body>'
|
||||||
soup = self.soup(data)
|
soup = self.soup(data)
|
||||||
d1 = soup.find('div', id='d1')
|
d1 = soup.find('div', id='d1')
|
||||||
d2 = soup.find('div', id='d2')
|
d2 = soup.find('div', id='d2')
|
||||||
d2.extend(d1)
|
tags = get_tags(d1)
|
||||||
|
d2.extend(tags)
|
||||||
assert '<div id="d1"></div>' == d1.decode()
|
assert '<div id="d1"></div>' == d1.decode()
|
||||||
assert '<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>' == d2.decode()
|
assert '<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>' == d2.decode()
|
||||||
|
|
||||||
|
@ -1272,19 +1275,30 @@ class TestTreeModification(SoupTest):
|
||||||
|
|
||||||
class TestDeprecatedArguments(SoupTest):
|
class TestDeprecatedArguments(SoupTest):
|
||||||
|
|
||||||
def test_find_type_method_string(self):
|
@pytest.mark.parametrize(
|
||||||
|
"method_name", [
|
||||||
|
"find", "find_all", "find_parent", "find_parents",
|
||||||
|
"find_next", "find_all_next", "find_previous",
|
||||||
|
"find_all_previous", "find_next_sibling", "find_next_siblings",
|
||||||
|
"find_previous_sibling", "find_previous_siblings",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_find_type_method_string(self, method_name):
|
||||||
soup = self.soup("<a>some</a><b>markup</b>")
|
soup = self.soup("<a>some</a><b>markup</b>")
|
||||||
|
method = getattr(soup.b, method_name)
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
[result] = soup.find_all(text='markup')
|
method(text='markup')
|
||||||
assert result == 'markup'
|
[warning] = w
|
||||||
assert result.parent.name == 'b'
|
assert warning.filename == __file__
|
||||||
msg = str(w[0].message)
|
msg = str(warning.message)
|
||||||
assert msg == "The 'text' argument to find()-type methods is deprecated. Use 'string' instead."
|
assert msg == "The 'text' argument to find()-type methods is deprecated. Use 'string' instead."
|
||||||
|
|
||||||
def test_soupstrainer_constructor_string(self):
|
def test_soupstrainer_constructor_string(self):
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
strainer = SoupStrainer(text="text")
|
strainer = SoupStrainer(text="text")
|
||||||
assert strainer.text == 'text'
|
assert strainer.text == 'text'
|
||||||
msg = str(w[0].message)
|
[warning] = w
|
||||||
|
msg = str(warning.message)
|
||||||
|
assert warning.filename == __file__
|
||||||
assert msg == "The 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead."
|
assert msg == "The 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead."
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
from .__meta__ import __version__, __version_info__ # noqa: F401
|
from .__meta__ import __version__, __version_info__ # noqa: F401
|
||||||
from . import css_parser as cp
|
from . import css_parser as cp
|
||||||
from . import css_match as cm
|
from . import css_match as cm
|
||||||
from . import css_types as ct
|
from . import css_types as ct
|
||||||
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
||||||
import bs4 # type: ignore[import]
|
import bs4 # type: ignore[import]
|
||||||
from typing import Dict, Optional, Any, List, Iterator, Iterable
|
from typing import Optional, Any, Iterator, Iterable
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
||||||
|
@ -44,17 +45,14 @@ SoupSieve = cm.SoupSieve
|
||||||
|
|
||||||
def compile( # noqa: A001
|
def compile( # noqa: A001
|
||||||
pattern: str,
|
pattern: str,
|
||||||
namespaces: Optional[Dict[str, str]] = None,
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
flags: int = 0,
|
flags: int = 0,
|
||||||
*,
|
*,
|
||||||
custom: Optional[Dict[str, str]] = None,
|
custom: Optional[dict[str, str]] = None,
|
||||||
**kwargs: Any
|
**kwargs: Any
|
||||||
) -> cm.SoupSieve:
|
) -> cm.SoupSieve:
|
||||||
"""Compile CSS pattern."""
|
"""Compile CSS pattern."""
|
||||||
|
|
||||||
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
|
|
||||||
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
|
|
||||||
|
|
||||||
if isinstance(pattern, SoupSieve):
|
if isinstance(pattern, SoupSieve):
|
||||||
if flags:
|
if flags:
|
||||||
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
|
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
|
||||||
|
@ -64,7 +62,12 @@ def compile( # noqa: A001
|
||||||
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
|
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
|
||||||
return pattern
|
return pattern
|
||||||
|
|
||||||
return cp._cached_css_compile(pattern, ns, cs, flags)
|
return cp._cached_css_compile(
|
||||||
|
pattern,
|
||||||
|
ct.Namespaces(namespaces) if namespaces is not None else namespaces,
|
||||||
|
ct.CustomSelectors(custom) if custom is not None else custom,
|
||||||
|
flags
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def purge() -> None:
|
def purge() -> None:
|
||||||
|
@ -76,10 +79,10 @@ def purge() -> None:
|
||||||
def closest(
|
def closest(
|
||||||
select: str,
|
select: str,
|
||||||
tag: 'bs4.Tag',
|
tag: 'bs4.Tag',
|
||||||
namespaces: Optional[Dict[str, str]] = None,
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
flags: int = 0,
|
flags: int = 0,
|
||||||
*,
|
*,
|
||||||
custom: Optional[Dict[str, str]] = None,
|
custom: Optional[dict[str, str]] = None,
|
||||||
**kwargs: Any
|
**kwargs: Any
|
||||||
) -> 'bs4.Tag':
|
) -> 'bs4.Tag':
|
||||||
"""Match closest ancestor."""
|
"""Match closest ancestor."""
|
||||||
|
@ -90,10 +93,10 @@ def closest(
|
||||||
def match(
|
def match(
|
||||||
select: str,
|
select: str,
|
||||||
tag: 'bs4.Tag',
|
tag: 'bs4.Tag',
|
||||||
namespaces: Optional[Dict[str, str]] = None,
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
flags: int = 0,
|
flags: int = 0,
|
||||||
*,
|
*,
|
||||||
custom: Optional[Dict[str, str]] = None,
|
custom: Optional[dict[str, str]] = None,
|
||||||
**kwargs: Any
|
**kwargs: Any
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Match node."""
|
"""Match node."""
|
||||||
|
@ -104,12 +107,12 @@ def match(
|
||||||
def filter( # noqa: A001
|
def filter( # noqa: A001
|
||||||
select: str,
|
select: str,
|
||||||
iterable: Iterable['bs4.Tag'],
|
iterable: Iterable['bs4.Tag'],
|
||||||
namespaces: Optional[Dict[str, str]] = None,
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
flags: int = 0,
|
flags: int = 0,
|
||||||
*,
|
*,
|
||||||
custom: Optional[Dict[str, str]] = None,
|
custom: Optional[dict[str, str]] = None,
|
||||||
**kwargs: Any
|
**kwargs: Any
|
||||||
) -> List['bs4.Tag']:
|
) -> list['bs4.Tag']:
|
||||||
"""Filter list of nodes."""
|
"""Filter list of nodes."""
|
||||||
|
|
||||||
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
||||||
|
@ -118,10 +121,10 @@ def filter( # noqa: A001
|
||||||
def select_one(
|
def select_one(
|
||||||
select: str,
|
select: str,
|
||||||
tag: 'bs4.Tag',
|
tag: 'bs4.Tag',
|
||||||
namespaces: Optional[Dict[str, str]] = None,
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
flags: int = 0,
|
flags: int = 0,
|
||||||
*,
|
*,
|
||||||
custom: Optional[Dict[str, str]] = None,
|
custom: Optional[dict[str, str]] = None,
|
||||||
**kwargs: Any
|
**kwargs: Any
|
||||||
) -> 'bs4.Tag':
|
) -> 'bs4.Tag':
|
||||||
"""Select a single tag."""
|
"""Select a single tag."""
|
||||||
|
@ -132,13 +135,13 @@ def select_one(
|
||||||
def select(
|
def select(
|
||||||
select: str,
|
select: str,
|
||||||
tag: 'bs4.Tag',
|
tag: 'bs4.Tag',
|
||||||
namespaces: Optional[Dict[str, str]] = None,
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
limit: int = 0,
|
limit: int = 0,
|
||||||
flags: int = 0,
|
flags: int = 0,
|
||||||
*,
|
*,
|
||||||
custom: Optional[Dict[str, str]] = None,
|
custom: Optional[dict[str, str]] = None,
|
||||||
**kwargs: Any
|
**kwargs: Any
|
||||||
) -> List['bs4.Tag']:
|
) -> list['bs4.Tag']:
|
||||||
"""Select the specified tags."""
|
"""Select the specified tags."""
|
||||||
|
|
||||||
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
|
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
|
||||||
|
@ -147,11 +150,11 @@ def select(
|
||||||
def iselect(
|
def iselect(
|
||||||
select: str,
|
select: str,
|
||||||
tag: 'bs4.Tag',
|
tag: 'bs4.Tag',
|
||||||
namespaces: Optional[Dict[str, str]] = None,
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
limit: int = 0,
|
limit: int = 0,
|
||||||
flags: int = 0,
|
flags: int = 0,
|
||||||
*,
|
*,
|
||||||
custom: Optional[Dict[str, str]] = None,
|
custom: Optional[dict[str, str]] = None,
|
||||||
**kwargs: Any
|
**kwargs: Any
|
||||||
) -> Iterator['bs4.Tag']:
|
) -> Iterator['bs4.Tag']:
|
||||||
"""Iterate the specified tags."""
|
"""Iterate the specified tags."""
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
"""Meta related things."""
|
"""Meta related things."""
|
||||||
|
from __future__ import annotations
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -83,7 +84,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
|
||||||
cls,
|
cls,
|
||||||
major: int, minor: int, micro: int, release: str = "final",
|
major: int, minor: int, micro: int, release: str = "final",
|
||||||
pre: int = 0, post: int = 0, dev: int = 0
|
pre: int = 0, post: int = 0, dev: int = 0
|
||||||
) -> "Version":
|
) -> Version:
|
||||||
"""Validate version info."""
|
"""Validate version info."""
|
||||||
|
|
||||||
# Ensure all parts are positive integers.
|
# Ensure all parts are positive integers.
|
||||||
|
@ -192,5 +193,5 @@ def parse_version(ver: str) -> Version:
|
||||||
return Version(major, minor, micro, release, pre, post, dev)
|
return Version(major, minor, micro, release, pre, post, dev)
|
||||||
|
|
||||||
|
|
||||||
__version_info__ = Version(2, 3, 2, "final", post=1)
|
__version_info__ = Version(2, 4, 0, "final")
|
||||||
__version__ = __version_info__._get_canonical()
|
__version__ = __version_info__._get_canonical()
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
"""CSS matcher."""
|
"""CSS matcher."""
|
||||||
|
from __future__ import annotations
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from . import util
|
from . import util
|
||||||
import re
|
import re
|
||||||
from . import css_types as ct
|
from . import css_types as ct
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import bs4 # type: ignore[import]
|
import bs4 # type: ignore[import]
|
||||||
from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast
|
from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast # noqa: F401
|
||||||
|
|
||||||
# Empty tag pattern (whitespace okay)
|
# Empty tag pattern (whitespace okay)
|
||||||
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
||||||
|
@ -64,12 +65,12 @@ class _FakeParent:
|
||||||
fake parent so we can traverse the root element as a child.
|
fake parent so we can traverse the root element as a child.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, element: 'bs4.Tag') -> None:
|
def __init__(self, element: bs4.Tag) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
self.contents = [element]
|
self.contents = [element]
|
||||||
|
|
||||||
def __len__(self) -> 'bs4.PageElement':
|
def __len__(self) -> bs4.PageElement:
|
||||||
"""Length."""
|
"""Length."""
|
||||||
|
|
||||||
return len(self.contents)
|
return len(self.contents)
|
||||||
|
@ -87,59 +88,59 @@ class _DocumentNav:
|
||||||
raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag)))
|
raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag)))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_doc(obj: 'bs4.Tag') -> bool:
|
def is_doc(obj: bs4.Tag) -> bool:
|
||||||
"""Is `BeautifulSoup` object."""
|
"""Is `BeautifulSoup` object."""
|
||||||
return isinstance(obj, bs4.BeautifulSoup)
|
return isinstance(obj, bs4.BeautifulSoup)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_tag(obj: 'bs4.PageElement') -> bool:
|
def is_tag(obj: bs4.PageElement) -> bool:
|
||||||
"""Is tag."""
|
"""Is tag."""
|
||||||
return isinstance(obj, bs4.Tag)
|
return isinstance(obj, bs4.Tag)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_declaration(obj: 'bs4.PageElement') -> bool: # pragma: no cover
|
def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover
|
||||||
"""Is declaration."""
|
"""Is declaration."""
|
||||||
return isinstance(obj, bs4.Declaration)
|
return isinstance(obj, bs4.Declaration)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_cdata(obj: 'bs4.PageElement') -> bool:
|
def is_cdata(obj: bs4.PageElement) -> bool:
|
||||||
"""Is CDATA."""
|
"""Is CDATA."""
|
||||||
return isinstance(obj, bs4.CData)
|
return isinstance(obj, bs4.CData)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_processing_instruction(obj: 'bs4.PageElement') -> bool: # pragma: no cover
|
def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover
|
||||||
"""Is processing instruction."""
|
"""Is processing instruction."""
|
||||||
return isinstance(obj, bs4.ProcessingInstruction)
|
return isinstance(obj, bs4.ProcessingInstruction)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_navigable_string(obj: 'bs4.PageElement') -> bool:
|
def is_navigable_string(obj: bs4.PageElement) -> bool:
|
||||||
"""Is navigable string."""
|
"""Is navigable string."""
|
||||||
return isinstance(obj, bs4.NavigableString)
|
return isinstance(obj, bs4.NavigableString)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_special_string(obj: 'bs4.PageElement') -> bool:
|
def is_special_string(obj: bs4.PageElement) -> bool:
|
||||||
"""Is special string."""
|
"""Is special string."""
|
||||||
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
|
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_content_string(cls, obj: 'bs4.PageElement') -> bool:
|
def is_content_string(cls, obj: bs4.PageElement) -> bool:
|
||||||
"""Check if node is content string."""
|
"""Check if node is content string."""
|
||||||
|
|
||||||
return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
|
return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_fake_parent(el: 'bs4.Tag') -> _FakeParent:
|
def create_fake_parent(el: bs4.Tag) -> _FakeParent:
|
||||||
"""Create fake parent for a given element."""
|
"""Create fake parent for a given element."""
|
||||||
|
|
||||||
return _FakeParent(el)
|
return _FakeParent(el)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_xml_tree(el: 'bs4.Tag') -> bool:
|
def is_xml_tree(el: bs4.Tag) -> bool:
|
||||||
"""Check if element (or document) is from a XML tree."""
|
"""Check if element (or document) is from a XML tree."""
|
||||||
|
|
||||||
return bool(el._is_xml)
|
return bool(el._is_xml)
|
||||||
|
|
||||||
def is_iframe(self, el: 'bs4.Tag') -> bool:
|
def is_iframe(self, el: bs4.Tag) -> bool:
|
||||||
"""Check if element is an `iframe`."""
|
"""Check if element is an `iframe`."""
|
||||||
|
|
||||||
return bool(
|
return bool(
|
||||||
|
@ -147,7 +148,7 @@ class _DocumentNav:
|
||||||
self.is_html_tag(el) # type: ignore[attr-defined]
|
self.is_html_tag(el) # type: ignore[attr-defined]
|
||||||
)
|
)
|
||||||
|
|
||||||
def is_root(self, el: 'bs4.Tag') -> bool:
|
def is_root(self, el: bs4.Tag) -> bool:
|
||||||
"""
|
"""
|
||||||
Return whether element is a root element.
|
Return whether element is a root element.
|
||||||
|
|
||||||
|
@ -161,7 +162,7 @@ class _DocumentNav:
|
||||||
root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
|
root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
|
||||||
return root
|
return root
|
||||||
|
|
||||||
def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']:
|
def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
|
||||||
"""Get contents or contents in reverse."""
|
"""Get contents or contents in reverse."""
|
||||||
if not no_iframe or not self.is_iframe(el):
|
if not no_iframe or not self.is_iframe(el):
|
||||||
for content in el.contents:
|
for content in el.contents:
|
||||||
|
@ -169,12 +170,12 @@ class _DocumentNav:
|
||||||
|
|
||||||
def get_children(
|
def get_children(
|
||||||
self,
|
self,
|
||||||
el: 'bs4.Tag',
|
el: bs4.Tag,
|
||||||
start: Optional[int] = None,
|
start: Optional[int] = None,
|
||||||
reverse: bool = False,
|
reverse: bool = False,
|
||||||
tags: bool = True,
|
tags: bool = True,
|
||||||
no_iframe: bool = False
|
no_iframe: bool = False
|
||||||
) -> Iterator['bs4.PageElement']:
|
) -> Iterator[bs4.PageElement]:
|
||||||
"""Get children."""
|
"""Get children."""
|
||||||
|
|
||||||
if not no_iframe or not self.is_iframe(el):
|
if not no_iframe or not self.is_iframe(el):
|
||||||
|
@ -195,10 +196,10 @@ class _DocumentNav:
|
||||||
|
|
||||||
def get_descendants(
|
def get_descendants(
|
||||||
self,
|
self,
|
||||||
el: 'bs4.Tag',
|
el: bs4.Tag,
|
||||||
tags: bool = True,
|
tags: bool = True,
|
||||||
no_iframe: bool = False
|
no_iframe: bool = False
|
||||||
) -> Iterator['bs4.PageElement']:
|
) -> Iterator[bs4.PageElement]:
|
||||||
"""Get descendants."""
|
"""Get descendants."""
|
||||||
|
|
||||||
if not no_iframe or not self.is_iframe(el):
|
if not no_iframe or not self.is_iframe(el):
|
||||||
|
@ -229,7 +230,7 @@ class _DocumentNav:
|
||||||
if not tags or is_tag:
|
if not tags or is_tag:
|
||||||
yield child
|
yield child
|
||||||
|
|
||||||
def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag':
|
def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag:
|
||||||
"""Get parent."""
|
"""Get parent."""
|
||||||
|
|
||||||
parent = el.parent
|
parent = el.parent
|
||||||
|
@ -238,25 +239,25 @@ class _DocumentNav:
|
||||||
return parent
|
return parent
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_tag_name(el: 'bs4.Tag') -> Optional[str]:
|
def get_tag_name(el: bs4.Tag) -> Optional[str]:
|
||||||
"""Get tag."""
|
"""Get tag."""
|
||||||
|
|
||||||
return cast(Optional[str], el.name)
|
return cast(Optional[str], el.name)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_prefix_name(el: 'bs4.Tag') -> Optional[str]:
|
def get_prefix_name(el: bs4.Tag) -> Optional[str]:
|
||||||
"""Get prefix."""
|
"""Get prefix."""
|
||||||
|
|
||||||
return cast(Optional[str], el.prefix)
|
return cast(Optional[str], el.prefix)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_uri(el: 'bs4.Tag') -> Optional[str]:
|
def get_uri(el: bs4.Tag) -> Optional[str]:
|
||||||
"""Get namespace `URI`."""
|
"""Get namespace `URI`."""
|
||||||
|
|
||||||
return cast(Optional[str], el.namespace)
|
return cast(Optional[str], el.namespace)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
|
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
|
||||||
"""Get next sibling tag."""
|
"""Get next sibling tag."""
|
||||||
|
|
||||||
sibling = el.next_sibling
|
sibling = el.next_sibling
|
||||||
|
@ -265,7 +266,7 @@ class _DocumentNav:
|
||||||
return sibling
|
return sibling
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
|
def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
|
||||||
"""Get previous sibling tag."""
|
"""Get previous sibling tag."""
|
||||||
|
|
||||||
sibling = el.previous_sibling
|
sibling = el.previous_sibling
|
||||||
|
@ -274,7 +275,7 @@ class _DocumentNav:
|
||||||
return sibling
|
return sibling
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def has_html_ns(el: 'bs4.Tag') -> bool:
|
def has_html_ns(el: bs4.Tag) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if element has an HTML namespace.
|
Check if element has an HTML namespace.
|
||||||
|
|
||||||
|
@ -286,13 +287,13 @@ class _DocumentNav:
|
||||||
return bool(ns and ns == NS_XHTML)
|
return bool(ns and ns == NS_XHTML)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]:
|
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]:
|
||||||
"""Return namespace and attribute name without the prefix."""
|
"""Return namespace and attribute name without the prefix."""
|
||||||
|
|
||||||
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
|
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]:
|
def normalize_value(cls, value: Any) -> str | Sequence[str]:
|
||||||
"""Normalize the value to be a string or list of strings."""
|
"""Normalize the value to be a string or list of strings."""
|
||||||
|
|
||||||
# Treat `None` as empty string.
|
# Treat `None` as empty string.
|
||||||
|
@ -327,10 +328,10 @@ class _DocumentNav:
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_attribute_by_name(
|
def get_attribute_by_name(
|
||||||
cls,
|
cls,
|
||||||
el: 'bs4.Tag',
|
el: bs4.Tag,
|
||||||
name: str,
|
name: str,
|
||||||
default: Optional[Union[str, Sequence[str]]] = None
|
default: Optional[str | Sequence[str]] = None
|
||||||
) -> Optional[Union[str, Sequence[str]]]:
|
) -> Optional[str | Sequence[str]]:
|
||||||
"""Get attribute by name."""
|
"""Get attribute by name."""
|
||||||
|
|
||||||
value = default
|
value = default
|
||||||
|
@ -347,14 +348,14 @@ class _DocumentNav:
|
||||||
return value
|
return value
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]:
|
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]:
|
||||||
"""Iterate attributes."""
|
"""Iterate attributes."""
|
||||||
|
|
||||||
for k, v in el.attrs.items():
|
for k, v in el.attrs.items():
|
||||||
yield k, cls.normalize_value(v)
|
yield k, cls.normalize_value(v)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]:
|
def get_classes(cls, el: bs4.Tag) -> Sequence[str]:
|
||||||
"""Get classes."""
|
"""Get classes."""
|
||||||
|
|
||||||
classes = cls.get_attribute_by_name(el, 'class', [])
|
classes = cls.get_attribute_by_name(el, 'class', [])
|
||||||
|
@ -362,14 +363,14 @@ class _DocumentNav:
|
||||||
classes = RE_NOT_WS.findall(classes)
|
classes = RE_NOT_WS.findall(classes)
|
||||||
return cast(Sequence[str], classes)
|
return cast(Sequence[str], classes)
|
||||||
|
|
||||||
def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str:
|
def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:
|
||||||
"""Get text."""
|
"""Get text."""
|
||||||
|
|
||||||
return ''.join(
|
return ''.join(
|
||||||
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
|
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]:
|
def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:
|
||||||
"""Get Own Text."""
|
"""Get Own Text."""
|
||||||
|
|
||||||
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
|
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
|
||||||
|
@ -423,10 +424,10 @@ class Inputs:
|
||||||
return 0 <= minutes <= 59
|
return 0 <= minutes <= 59
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]:
|
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]:
|
||||||
"""Parse the input value."""
|
"""Parse the input value."""
|
||||||
|
|
||||||
parsed = None # type: Optional[Tuple[float, ...]]
|
parsed = None # type: Optional[tuple[float, ...]]
|
||||||
if value is None:
|
if value is None:
|
||||||
return value
|
return value
|
||||||
if itype == "date":
|
if itype == "date":
|
||||||
|
@ -484,7 +485,7 @@ class CSSMatch(_DocumentNav):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
selectors: ct.SelectorList,
|
selectors: ct.SelectorList,
|
||||||
scope: 'bs4.Tag',
|
scope: bs4.Tag,
|
||||||
namespaces: Optional[ct.Namespaces],
|
namespaces: Optional[ct.Namespaces],
|
||||||
flags: int
|
flags: int
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -492,11 +493,11 @@ class CSSMatch(_DocumentNav):
|
||||||
|
|
||||||
self.assert_valid_input(scope)
|
self.assert_valid_input(scope)
|
||||||
self.tag = scope
|
self.tag = scope
|
||||||
self.cached_meta_lang = [] # type: List[Tuple[str, str]]
|
self.cached_meta_lang = [] # type: list[tuple[str, str]]
|
||||||
self.cached_default_forms = [] # type: List[Tuple['bs4.Tag', 'bs4.Tag']]
|
self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]
|
||||||
self.cached_indeterminate_forms = [] # type: List[Tuple['bs4.Tag', str, bool]]
|
self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]
|
||||||
self.selectors = selectors
|
self.selectors = selectors
|
||||||
self.namespaces = {} if namespaces is None else namespaces # type: Union[ct.Namespaces, Dict[str, str]]
|
self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]
|
||||||
self.flags = flags
|
self.flags = flags
|
||||||
self.iframe_restrict = False
|
self.iframe_restrict = False
|
||||||
|
|
||||||
|
@ -527,7 +528,7 @@ class CSSMatch(_DocumentNav):
|
||||||
|
|
||||||
return self.is_xml or self.has_html_namespace
|
return self.is_xml or self.has_html_namespace
|
||||||
|
|
||||||
def get_tag_ns(self, el: 'bs4.Tag') -> str:
|
def get_tag_ns(self, el: bs4.Tag) -> str:
|
||||||
"""Get tag namespace."""
|
"""Get tag namespace."""
|
||||||
|
|
||||||
if self.supports_namespaces():
|
if self.supports_namespaces():
|
||||||
|
@ -539,24 +540,24 @@ class CSSMatch(_DocumentNav):
|
||||||
namespace = NS_XHTML
|
namespace = NS_XHTML
|
||||||
return namespace
|
return namespace
|
||||||
|
|
||||||
def is_html_tag(self, el: 'bs4.Tag') -> bool:
|
def is_html_tag(self, el: bs4.Tag) -> bool:
|
||||||
"""Check if tag is in HTML namespace."""
|
"""Check if tag is in HTML namespace."""
|
||||||
|
|
||||||
return self.get_tag_ns(el) == NS_XHTML
|
return self.get_tag_ns(el) == NS_XHTML
|
||||||
|
|
||||||
def get_tag(self, el: 'bs4.Tag') -> Optional[str]:
|
def get_tag(self, el: bs4.Tag) -> Optional[str]:
|
||||||
"""Get tag."""
|
"""Get tag."""
|
||||||
|
|
||||||
name = self.get_tag_name(el)
|
name = self.get_tag_name(el)
|
||||||
return util.lower(name) if name is not None and not self.is_xml else name
|
return util.lower(name) if name is not None and not self.is_xml else name
|
||||||
|
|
||||||
def get_prefix(self, el: 'bs4.Tag') -> Optional[str]:
|
def get_prefix(self, el: bs4.Tag) -> Optional[str]:
|
||||||
"""Get prefix."""
|
"""Get prefix."""
|
||||||
|
|
||||||
prefix = self.get_prefix_name(el)
|
prefix = self.get_prefix_name(el)
|
||||||
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
|
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
|
||||||
|
|
||||||
def find_bidi(self, el: 'bs4.Tag') -> Optional[int]:
|
def find_bidi(self, el: bs4.Tag) -> Optional[int]:
|
||||||
"""Get directionality from element text."""
|
"""Get directionality from element text."""
|
||||||
|
|
||||||
for node in self.get_children(el, tags=False):
|
for node in self.get_children(el, tags=False):
|
||||||
|
@ -600,13 +601,18 @@ class CSSMatch(_DocumentNav):
|
||||||
ranges = lang_range.split('-')
|
ranges = lang_range.split('-')
|
||||||
subtags = lang_tag.lower().split('-')
|
subtags = lang_tag.lower().split('-')
|
||||||
length = len(ranges)
|
length = len(ranges)
|
||||||
|
slength = len(subtags)
|
||||||
rindex = 0
|
rindex = 0
|
||||||
sindex = 0
|
sindex = 0
|
||||||
r = ranges[rindex]
|
r = ranges[rindex]
|
||||||
s = subtags[sindex]
|
s = subtags[sindex]
|
||||||
|
|
||||||
|
# Empty specified language should match unspecified language attributes
|
||||||
|
if length == 1 and slength == 1 and not r and r == s:
|
||||||
|
return True
|
||||||
|
|
||||||
# Primary tag needs to match
|
# Primary tag needs to match
|
||||||
if r != '*' and r != s:
|
if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):
|
||||||
match = False
|
match = False
|
||||||
|
|
||||||
rindex += 1
|
rindex += 1
|
||||||
|
@ -645,10 +651,10 @@ class CSSMatch(_DocumentNav):
|
||||||
|
|
||||||
def match_attribute_name(
|
def match_attribute_name(
|
||||||
self,
|
self,
|
||||||
el: 'bs4.Tag',
|
el: bs4.Tag,
|
||||||
attr: str,
|
attr: str,
|
||||||
prefix: Optional[str]
|
prefix: Optional[str]
|
||||||
) -> Optional[Union[str, Sequence[str]]]:
|
) -> Optional[str | Sequence[str]]:
|
||||||
"""Match attribute name and return value if it exists."""
|
"""Match attribute name and return value if it exists."""
|
||||||
|
|
||||||
value = None
|
value = None
|
||||||
|
@ -696,7 +702,7 @@ class CSSMatch(_DocumentNav):
|
||||||
break
|
break
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
|
def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
|
||||||
"""Match the namespace of the element."""
|
"""Match the namespace of the element."""
|
||||||
|
|
||||||
match = True
|
match = True
|
||||||
|
@ -717,7 +723,7 @@ class CSSMatch(_DocumentNav):
|
||||||
match = False
|
match = False
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool:
|
def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:
|
||||||
"""Match attributes."""
|
"""Match attributes."""
|
||||||
|
|
||||||
match = True
|
match = True
|
||||||
|
@ -736,7 +742,7 @@ class CSSMatch(_DocumentNav):
|
||||||
break
|
break
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
|
def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
|
||||||
"""Match tag name."""
|
"""Match tag name."""
|
||||||
|
|
||||||
name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
|
name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
|
||||||
|
@ -745,7 +751,7 @@ class CSSMatch(_DocumentNav):
|
||||||
name not in (self.get_tag(el), '*')
|
name not in (self.get_tag(el), '*')
|
||||||
)
|
)
|
||||||
|
|
||||||
def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool:
|
def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool:
|
||||||
"""Match the tag."""
|
"""Match the tag."""
|
||||||
|
|
||||||
match = True
|
match = True
|
||||||
|
@ -757,7 +763,7 @@ class CSSMatch(_DocumentNav):
|
||||||
match = False
|
match = False
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
|
def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
|
||||||
"""Match past relationship."""
|
"""Match past relationship."""
|
||||||
|
|
||||||
found = False
|
found = False
|
||||||
|
@ -785,12 +791,12 @@ class CSSMatch(_DocumentNav):
|
||||||
found = self.match_selectors(sibling, relation)
|
found = self.match_selectors(sibling, relation)
|
||||||
return found
|
return found
|
||||||
|
|
||||||
def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool:
|
def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:
|
||||||
"""Match future child."""
|
"""Match future child."""
|
||||||
|
|
||||||
match = False
|
match = False
|
||||||
if recursive:
|
if recursive:
|
||||||
children = self.get_descendants # type: Callable[..., Iterator['bs4.Tag']]
|
children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]]
|
||||||
else:
|
else:
|
||||||
children = self.get_children
|
children = self.get_children
|
||||||
for child in children(parent, no_iframe=self.iframe_restrict):
|
for child in children(parent, no_iframe=self.iframe_restrict):
|
||||||
|
@ -799,7 +805,7 @@ class CSSMatch(_DocumentNav):
|
||||||
break
|
break
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
|
def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
|
||||||
"""Match future relationship."""
|
"""Match future relationship."""
|
||||||
|
|
||||||
found = False
|
found = False
|
||||||
|
@ -822,7 +828,7 @@ class CSSMatch(_DocumentNav):
|
||||||
found = self.match_selectors(sibling, relation)
|
found = self.match_selectors(sibling, relation)
|
||||||
return found
|
return found
|
||||||
|
|
||||||
def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
|
def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
|
||||||
"""Match relationship to other elements."""
|
"""Match relationship to other elements."""
|
||||||
|
|
||||||
found = False
|
found = False
|
||||||
|
@ -837,7 +843,7 @@ class CSSMatch(_DocumentNav):
|
||||||
|
|
||||||
return found
|
return found
|
||||||
|
|
||||||
def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool:
|
def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:
|
||||||
"""Match element's ID."""
|
"""Match element's ID."""
|
||||||
|
|
||||||
found = True
|
found = True
|
||||||
|
@ -847,7 +853,7 @@ class CSSMatch(_DocumentNav):
|
||||||
break
|
break
|
||||||
return found
|
return found
|
||||||
|
|
||||||
def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool:
|
def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:
|
||||||
"""Match element's classes."""
|
"""Match element's classes."""
|
||||||
|
|
||||||
current_classes = self.get_classes(el)
|
current_classes = self.get_classes(el)
|
||||||
|
@ -858,7 +864,7 @@ class CSSMatch(_DocumentNav):
|
||||||
break
|
break
|
||||||
return found
|
return found
|
||||||
|
|
||||||
def match_root(self, el: 'bs4.Tag') -> bool:
|
def match_root(self, el: bs4.Tag) -> bool:
|
||||||
"""Match element as root."""
|
"""Match element as root."""
|
||||||
|
|
||||||
is_root = self.is_root(el)
|
is_root = self.is_root(el)
|
||||||
|
@ -884,20 +890,20 @@ class CSSMatch(_DocumentNav):
|
||||||
sibling = self.get_next(sibling, tags=False)
|
sibling = self.get_next(sibling, tags=False)
|
||||||
return is_root
|
return is_root
|
||||||
|
|
||||||
def match_scope(self, el: 'bs4.Tag') -> bool:
|
def match_scope(self, el: bs4.Tag) -> bool:
|
||||||
"""Match element as scope."""
|
"""Match element as scope."""
|
||||||
|
|
||||||
return self.scope is el
|
return self.scope is el
|
||||||
|
|
||||||
def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool:
|
def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:
|
||||||
"""Match tag type for `nth` matches."""
|
"""Match tag type for `nth` matches."""
|
||||||
|
|
||||||
return(
|
return (
|
||||||
(self.get_tag(child) == self.get_tag(el)) and
|
(self.get_tag(child) == self.get_tag(el)) and
|
||||||
(self.get_tag_ns(child) == self.get_tag_ns(el))
|
(self.get_tag_ns(child) == self.get_tag_ns(el))
|
||||||
)
|
)
|
||||||
|
|
||||||
def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool:
|
def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool:
|
||||||
"""Match `nth` elements."""
|
"""Match `nth` elements."""
|
||||||
|
|
||||||
matched = True
|
matched = True
|
||||||
|
@ -998,7 +1004,7 @@ class CSSMatch(_DocumentNav):
|
||||||
break
|
break
|
||||||
return matched
|
return matched
|
||||||
|
|
||||||
def match_empty(self, el: 'bs4.Tag') -> bool:
|
def match_empty(self, el: bs4.Tag) -> bool:
|
||||||
"""Check if element is empty (if requested)."""
|
"""Check if element is empty (if requested)."""
|
||||||
|
|
||||||
is_empty = True
|
is_empty = True
|
||||||
|
@ -1011,7 +1017,7 @@ class CSSMatch(_DocumentNav):
|
||||||
break
|
break
|
||||||
return is_empty
|
return is_empty
|
||||||
|
|
||||||
def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool:
|
def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:
|
||||||
"""Match selectors."""
|
"""Match selectors."""
|
||||||
|
|
||||||
match = True
|
match = True
|
||||||
|
@ -1020,11 +1026,11 @@ class CSSMatch(_DocumentNav):
|
||||||
match = False
|
match = False
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool:
|
def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:
|
||||||
"""Match element if it contains text."""
|
"""Match element if it contains text."""
|
||||||
|
|
||||||
match = True
|
match = True
|
||||||
content = None # type: Optional[Union[str, Sequence[str]]]
|
content = None # type: Optional[str | Sequence[str]]
|
||||||
for contain_list in contains:
|
for contain_list in contains:
|
||||||
if content is None:
|
if content is None:
|
||||||
if contain_list.own:
|
if contain_list.own:
|
||||||
|
@ -1048,7 +1054,7 @@ class CSSMatch(_DocumentNav):
|
||||||
match = False
|
match = False
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_default(self, el: 'bs4.Tag') -> bool:
|
def match_default(self, el: bs4.Tag) -> bool:
|
||||||
"""Match default."""
|
"""Match default."""
|
||||||
|
|
||||||
match = False
|
match = False
|
||||||
|
@ -1087,13 +1093,13 @@ class CSSMatch(_DocumentNav):
|
||||||
break
|
break
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_indeterminate(self, el: 'bs4.Tag') -> bool:
|
def match_indeterminate(self, el: bs4.Tag) -> bool:
|
||||||
"""Match default."""
|
"""Match default."""
|
||||||
|
|
||||||
match = False
|
match = False
|
||||||
name = cast(str, self.get_attribute_by_name(el, 'name'))
|
name = cast(str, self.get_attribute_by_name(el, 'name'))
|
||||||
|
|
||||||
def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']:
|
def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]:
|
||||||
"""Find this input's form."""
|
"""Find this input's form."""
|
||||||
form = None
|
form = None
|
||||||
parent = self.get_parent(el, no_iframe=True)
|
parent = self.get_parent(el, no_iframe=True)
|
||||||
|
@ -1148,7 +1154,7 @@ class CSSMatch(_DocumentNav):
|
||||||
|
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool:
|
def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
|
||||||
"""Match languages."""
|
"""Match languages."""
|
||||||
|
|
||||||
match = False
|
match = False
|
||||||
|
@ -1183,7 +1189,7 @@ class CSSMatch(_DocumentNav):
|
||||||
break
|
break
|
||||||
|
|
||||||
# Use cached meta language.
|
# Use cached meta language.
|
||||||
if not found_lang and self.cached_meta_lang:
|
if found_lang is None and self.cached_meta_lang:
|
||||||
for cache in self.cached_meta_lang:
|
for cache in self.cached_meta_lang:
|
||||||
if root is cache[0]:
|
if root is cache[0]:
|
||||||
found_lang = cache[1]
|
found_lang = cache[1]
|
||||||
|
@ -1217,13 +1223,13 @@ class CSSMatch(_DocumentNav):
|
||||||
found_lang = content
|
found_lang = content
|
||||||
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
|
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
|
||||||
break
|
break
|
||||||
if found_lang:
|
if found_lang is not None:
|
||||||
break
|
break
|
||||||
if not found_lang:
|
if found_lang is None:
|
||||||
self.cached_meta_lang.append((cast(str, root), ''))
|
self.cached_meta_lang.append((cast(str, root), ''))
|
||||||
|
|
||||||
# If we determined a language, compare.
|
# If we determined a language, compare.
|
||||||
if found_lang:
|
if found_lang is not None:
|
||||||
for patterns in langs:
|
for patterns in langs:
|
||||||
match = False
|
match = False
|
||||||
for pattern in patterns:
|
for pattern in patterns:
|
||||||
|
@ -1234,7 +1240,7 @@ class CSSMatch(_DocumentNav):
|
||||||
|
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool:
|
def match_dir(self, el: bs4.Tag, directionality: int) -> bool:
|
||||||
"""Check directionality."""
|
"""Check directionality."""
|
||||||
|
|
||||||
# If we have to match both left and right, we can't match either.
|
# If we have to match both left and right, we can't match either.
|
||||||
|
@ -1297,7 +1303,7 @@ class CSSMatch(_DocumentNav):
|
||||||
# Match parents direction
|
# Match parents direction
|
||||||
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
|
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
|
||||||
|
|
||||||
def match_range(self, el: 'bs4.Tag', condition: int) -> bool:
|
def match_range(self, el: bs4.Tag, condition: int) -> bool:
|
||||||
"""
|
"""
|
||||||
Match range.
|
Match range.
|
||||||
|
|
||||||
|
@ -1337,7 +1343,7 @@ class CSSMatch(_DocumentNav):
|
||||||
|
|
||||||
return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
|
return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
|
||||||
|
|
||||||
def match_defined(self, el: 'bs4.Tag') -> bool:
|
def match_defined(self, el: bs4.Tag) -> bool:
|
||||||
"""
|
"""
|
||||||
Match defined.
|
Match defined.
|
||||||
|
|
||||||
|
@ -1360,7 +1366,7 @@ class CSSMatch(_DocumentNav):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def match_placeholder_shown(self, el: 'bs4.Tag') -> bool:
|
def match_placeholder_shown(self, el: bs4.Tag) -> bool:
|
||||||
"""
|
"""
|
||||||
Match placeholder shown according to HTML spec.
|
Match placeholder shown according to HTML spec.
|
||||||
|
|
||||||
|
@ -1375,7 +1381,7 @@ class CSSMatch(_DocumentNav):
|
||||||
|
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool:
|
def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:
|
||||||
"""Check if element matches one of the selectors."""
|
"""Check if element matches one of the selectors."""
|
||||||
|
|
||||||
match = False
|
match = False
|
||||||
|
@ -1459,7 +1465,7 @@ class CSSMatch(_DocumentNav):
|
||||||
|
|
||||||
return match
|
return match
|
||||||
|
|
||||||
def select(self, limit: int = 0) -> Iterator['bs4.Tag']:
|
def select(self, limit: int = 0) -> Iterator[bs4.Tag]:
|
||||||
"""Match all tags under the targeted tag."""
|
"""Match all tags under the targeted tag."""
|
||||||
|
|
||||||
lim = None if limit < 1 else limit
|
lim = None if limit < 1 else limit
|
||||||
|
@ -1472,7 +1478,7 @@ class CSSMatch(_DocumentNav):
|
||||||
if lim < 1:
|
if lim < 1:
|
||||||
break
|
break
|
||||||
|
|
||||||
def closest(self) -> Optional['bs4.Tag']:
|
def closest(self) -> Optional[bs4.Tag]:
|
||||||
"""Match closest ancestor."""
|
"""Match closest ancestor."""
|
||||||
|
|
||||||
current = self.tag
|
current = self.tag
|
||||||
|
@ -1484,12 +1490,12 @@ class CSSMatch(_DocumentNav):
|
||||||
current = self.get_parent(current)
|
current = self.get_parent(current)
|
||||||
return closest
|
return closest
|
||||||
|
|
||||||
def filter(self) -> List['bs4.Tag']: # noqa A001
|
def filter(self) -> list[bs4.Tag]: # noqa A001
|
||||||
"""Filter tag's children."""
|
"""Filter tag's children."""
|
||||||
|
|
||||||
return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
|
return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
|
||||||
|
|
||||||
def match(self, el: 'bs4.Tag') -> bool:
|
def match(self, el: bs4.Tag) -> bool:
|
||||||
"""Match."""
|
"""Match."""
|
||||||
|
|
||||||
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
|
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
|
||||||
|
@ -1501,7 +1507,7 @@ class SoupSieve(ct.Immutable):
|
||||||
pattern: str
|
pattern: str
|
||||||
selectors: ct.SelectorList
|
selectors: ct.SelectorList
|
||||||
namespaces: Optional[ct.Namespaces]
|
namespaces: Optional[ct.Namespaces]
|
||||||
custom: Dict[str, str]
|
custom: dict[str, str]
|
||||||
flags: int
|
flags: int
|
||||||
|
|
||||||
__slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
|
__slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
|
||||||
|
@ -1524,17 +1530,17 @@ class SoupSieve(ct.Immutable):
|
||||||
flags=flags
|
flags=flags
|
||||||
)
|
)
|
||||||
|
|
||||||
def match(self, tag: 'bs4.Tag') -> bool:
|
def match(self, tag: bs4.Tag) -> bool:
|
||||||
"""Match."""
|
"""Match."""
|
||||||
|
|
||||||
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
|
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
|
||||||
|
|
||||||
def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag':
|
def closest(self, tag: bs4.Tag) -> bs4.Tag:
|
||||||
"""Match closest ancestor."""
|
"""Match closest ancestor."""
|
||||||
|
|
||||||
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
|
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
|
||||||
|
|
||||||
def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']: # noqa A001
|
def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001
|
||||||
"""
|
"""
|
||||||
Filter.
|
Filter.
|
||||||
|
|
||||||
|
@ -1551,18 +1557,18 @@ class SoupSieve(ct.Immutable):
|
||||||
else:
|
else:
|
||||||
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
|
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
|
||||||
|
|
||||||
def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag':
|
def select_one(self, tag: bs4.Tag) -> bs4.Tag:
|
||||||
"""Select a single tag."""
|
"""Select a single tag."""
|
||||||
|
|
||||||
tags = self.select(tag, limit=1)
|
tags = self.select(tag, limit=1)
|
||||||
return tags[0] if tags else None
|
return tags[0] if tags else None
|
||||||
|
|
||||||
def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']:
|
def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:
|
||||||
"""Select the specified tags."""
|
"""Select the specified tags."""
|
||||||
|
|
||||||
return list(self.iselect(tag, limit))
|
return list(self.iselect(tag, limit))
|
||||||
|
|
||||||
def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']:
|
def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
|
||||||
"""Iterate the specified tags."""
|
"""Iterate the specified tags."""
|
||||||
|
|
||||||
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
|
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
"""CSS selector parser."""
|
"""CSS selector parser."""
|
||||||
|
from __future__ import annotations
|
||||||
import re
|
import re
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from . import util
|
from . import util
|
||||||
|
@ -6,7 +7,7 @@ from . import css_match as cm
|
||||||
from . import css_types as ct
|
from . import css_types as ct
|
||||||
from .util import SelectorSyntaxError
|
from .util import SelectorSyntaxError
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Optional, Dict, Match, Tuple, Type, Any, List, Union, Iterator, cast
|
from typing import Optional, Match, Any, Iterator, cast
|
||||||
|
|
||||||
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
||||||
|
|
||||||
|
@ -232,7 +233,7 @@ def _purge_cache() -> None:
|
||||||
_cached_css_compile.cache_clear()
|
_cached_css_compile.cache_clear()
|
||||||
|
|
||||||
|
|
||||||
def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str, ct.SelectorList]]:
|
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
|
||||||
"""Process custom."""
|
"""Process custom."""
|
||||||
|
|
||||||
custom_selectors = {}
|
custom_selectors = {}
|
||||||
|
@ -325,7 +326,7 @@ class SelectorPattern:
|
||||||
class SpecialPseudoPattern(SelectorPattern):
|
class SpecialPseudoPattern(SelectorPattern):
|
||||||
"""Selector pattern."""
|
"""Selector pattern."""
|
||||||
|
|
||||||
def __init__(self, patterns: Tuple[Tuple[str, Tuple[str, ...], str, Type[SelectorPattern]], ...]) -> None:
|
def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
self.patterns = {}
|
self.patterns = {}
|
||||||
|
@ -372,19 +373,19 @@ class _Selector:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
|
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
|
||||||
self.ids = kwargs.get('ids', []) # type: List[str]
|
self.ids = kwargs.get('ids', []) # type: list[str]
|
||||||
self.classes = kwargs.get('classes', []) # type: List[str]
|
self.classes = kwargs.get('classes', []) # type: list[str]
|
||||||
self.attributes = kwargs.get('attributes', []) # type: List[ct.SelectorAttribute]
|
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
|
||||||
self.nth = kwargs.get('nth', []) # type: List[ct.SelectorNth]
|
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
|
||||||
self.selectors = kwargs.get('selectors', []) # type: List[ct.SelectorList]
|
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
|
||||||
self.relations = kwargs.get('relations', []) # type: List[_Selector]
|
self.relations = kwargs.get('relations', []) # type: list[_Selector]
|
||||||
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
|
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
|
||||||
self.contains = kwargs.get('contains', []) # type: List[ct.SelectorContains]
|
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
|
||||||
self.lang = kwargs.get('lang', []) # type: List[ct.SelectorLang]
|
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
|
||||||
self.flags = kwargs.get('flags', 0) # type: int
|
self.flags = kwargs.get('flags', 0) # type: int
|
||||||
self.no_match = kwargs.get('no_match', False) # type: bool
|
self.no_match = kwargs.get('no_match', False) # type: bool
|
||||||
|
|
||||||
def _freeze_relations(self, relations: List['_Selector']) -> ct.SelectorList:
|
def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
|
||||||
"""Freeze relation."""
|
"""Freeze relation."""
|
||||||
|
|
||||||
if relations:
|
if relations:
|
||||||
|
@ -394,7 +395,7 @@ class _Selector:
|
||||||
else:
|
else:
|
||||||
return ct.SelectorList()
|
return ct.SelectorList()
|
||||||
|
|
||||||
def freeze(self) -> Union[ct.Selector, ct.SelectorNull]:
|
def freeze(self) -> ct.Selector | ct.SelectorNull:
|
||||||
"""Freeze self."""
|
"""Freeze self."""
|
||||||
|
|
||||||
if self.no_match:
|
if self.no_match:
|
||||||
|
@ -461,7 +462,7 @@ class CSSParser:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
selector: str,
|
selector: str,
|
||||||
custom: Optional[Dict[str, Union[str, ct.SelectorList]]] = None,
|
custom: Optional[dict[str, str | ct.SelectorList]] = None,
|
||||||
flags: int = 0
|
flags: int = 0
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
@ -583,9 +584,9 @@ class CSSParser:
|
||||||
sel: _Selector,
|
sel: _Selector,
|
||||||
m: Match[str],
|
m: Match[str],
|
||||||
has_selector: bool,
|
has_selector: bool,
|
||||||
iselector: Iterator[Tuple[str, Match[str]]],
|
iselector: Iterator[tuple[str, Match[str]]],
|
||||||
is_html: bool
|
is_html: bool
|
||||||
) -> Tuple[bool, bool]:
|
) -> tuple[bool, bool]:
|
||||||
"""Parse pseudo class."""
|
"""Parse pseudo class."""
|
||||||
|
|
||||||
complex_pseudo = False
|
complex_pseudo = False
|
||||||
|
@ -678,7 +679,7 @@ class CSSParser:
|
||||||
sel: _Selector,
|
sel: _Selector,
|
||||||
m: Match[str],
|
m: Match[str],
|
||||||
has_selector: bool,
|
has_selector: bool,
|
||||||
iselector: Iterator[Tuple[str, Match[str]]]
|
iselector: Iterator[tuple[str, Match[str]]]
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Parse `nth` pseudo."""
|
"""Parse `nth` pseudo."""
|
||||||
|
|
||||||
|
@ -743,7 +744,7 @@ class CSSParser:
|
||||||
sel: _Selector,
|
sel: _Selector,
|
||||||
name: str,
|
name: str,
|
||||||
has_selector: bool,
|
has_selector: bool,
|
||||||
iselector: Iterator[Tuple[str, Match[str]]],
|
iselector: Iterator[tuple[str, Match[str]]],
|
||||||
index: int
|
index: int
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Parse pseudo with opening bracket."""
|
"""Parse pseudo with opening bracket."""
|
||||||
|
@ -752,7 +753,7 @@ class CSSParser:
|
||||||
if name == ':not':
|
if name == ':not':
|
||||||
flags |= FLG_NOT
|
flags |= FLG_NOT
|
||||||
elif name == ':has':
|
elif name == ':has':
|
||||||
flags |= FLG_RELATIVE | FLG_FORGIVE
|
flags |= FLG_RELATIVE
|
||||||
elif name in (':where', ':is'):
|
elif name in (':where', ':is'):
|
||||||
flags |= FLG_FORGIVE
|
flags |= FLG_FORGIVE
|
||||||
|
|
||||||
|
@ -766,21 +767,16 @@ class CSSParser:
|
||||||
sel: _Selector,
|
sel: _Selector,
|
||||||
m: Match[str],
|
m: Match[str],
|
||||||
has_selector: bool,
|
has_selector: bool,
|
||||||
selectors: List[_Selector],
|
selectors: list[_Selector],
|
||||||
rel_type: str,
|
rel_type: str,
|
||||||
index: int
|
index: int
|
||||||
) -> Tuple[bool, _Selector, str]:
|
) -> tuple[bool, _Selector, str]:
|
||||||
"""Parse combinator tokens."""
|
"""Parse combinator tokens."""
|
||||||
|
|
||||||
combinator = m.group('relation').strip()
|
combinator = m.group('relation').strip()
|
||||||
if not combinator:
|
if not combinator:
|
||||||
combinator = WS_COMBINATOR
|
combinator = WS_COMBINATOR
|
||||||
if combinator == COMMA_COMBINATOR:
|
if combinator == COMMA_COMBINATOR:
|
||||||
if not has_selector:
|
|
||||||
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
|
|
||||||
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
|
|
||||||
sel.no_match = True
|
|
||||||
|
|
||||||
sel.rel_type = rel_type
|
sel.rel_type = rel_type
|
||||||
selectors[-1].relations.append(sel)
|
selectors[-1].relations.append(sel)
|
||||||
rel_type = ":" + WS_COMBINATOR
|
rel_type = ":" + WS_COMBINATOR
|
||||||
|
@ -814,12 +810,12 @@ class CSSParser:
|
||||||
sel: _Selector,
|
sel: _Selector,
|
||||||
m: Match[str],
|
m: Match[str],
|
||||||
has_selector: bool,
|
has_selector: bool,
|
||||||
selectors: List[_Selector],
|
selectors: list[_Selector],
|
||||||
relations: List[_Selector],
|
relations: list[_Selector],
|
||||||
is_pseudo: bool,
|
is_pseudo: bool,
|
||||||
is_forgive: bool,
|
is_forgive: bool,
|
||||||
index: int
|
index: int
|
||||||
) -> Tuple[bool, _Selector]:
|
) -> tuple[bool, _Selector]:
|
||||||
"""Parse combinator tokens."""
|
"""Parse combinator tokens."""
|
||||||
|
|
||||||
combinator = m.group('relation').strip()
|
combinator = m.group('relation').strip()
|
||||||
|
@ -924,7 +920,7 @@ class CSSParser:
|
||||||
|
|
||||||
def parse_selectors(
|
def parse_selectors(
|
||||||
self,
|
self,
|
||||||
iselector: Iterator[Tuple[str, Match[str]]],
|
iselector: Iterator[tuple[str, Match[str]]],
|
||||||
index: int = 0,
|
index: int = 0,
|
||||||
flags: int = 0
|
flags: int = 0
|
||||||
) -> ct.SelectorList:
|
) -> ct.SelectorList:
|
||||||
|
@ -935,7 +931,7 @@ class CSSParser:
|
||||||
selectors = []
|
selectors = []
|
||||||
has_selector = False
|
has_selector = False
|
||||||
closed = False
|
closed = False
|
||||||
relations = [] # type: List[_Selector]
|
relations = [] # type: list[_Selector]
|
||||||
rel_type = ":" + WS_COMBINATOR
|
rel_type = ":" + WS_COMBINATOR
|
||||||
|
|
||||||
# Setup various flags
|
# Setup various flags
|
||||||
|
@ -1069,18 +1065,8 @@ class CSSParser:
|
||||||
selectors.append(sel)
|
selectors.append(sel)
|
||||||
|
|
||||||
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
|
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
|
||||||
elif is_forgive:
|
elif is_forgive and (not selectors or not relations):
|
||||||
if is_relative:
|
# Handle normal pseudo-classes with empty slots like `:is()` etc.
|
||||||
# Handle relative selectors pseudo-classes with empty slots like `:has()`
|
|
||||||
if selectors and selectors[-1].rel_type is None and rel_type == ': ':
|
|
||||||
sel.rel_type = rel_type
|
|
||||||
sel.no_match = True
|
|
||||||
selectors[-1].relations.append(sel)
|
|
||||||
has_selector = True
|
|
||||||
else:
|
|
||||||
# Handle normal pseudo-classes with empty slots
|
|
||||||
if not selectors or not relations:
|
|
||||||
# Others like `:is()` etc.
|
|
||||||
sel.no_match = True
|
sel.no_match = True
|
||||||
del relations[:]
|
del relations[:]
|
||||||
selectors.append(sel)
|
selectors.append(sel)
|
||||||
|
@ -1112,7 +1098,7 @@ class CSSParser:
|
||||||
# Return selector list
|
# Return selector list
|
||||||
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
|
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
|
||||||
|
|
||||||
def selector_iter(self, pattern: str) -> Iterator[Tuple[str, Match[str]]]:
|
def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
|
||||||
"""Iterate selector tokens."""
|
"""Iterate selector tokens."""
|
||||||
|
|
||||||
# Ignore whitespace and comments at start and end of pattern
|
# Ignore whitespace and comments at start and end of pattern
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
"""CSS selector structure items."""
|
"""CSS selector structure items."""
|
||||||
|
from __future__ import annotations
|
||||||
import copyreg
|
import copyreg
|
||||||
from .pretty import pretty
|
from .pretty import pretty
|
||||||
from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
|
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
'Selector',
|
'Selector',
|
||||||
|
@ -33,7 +34,7 @@ SEL_PLACEHOLDER_SHOWN = 0x400
|
||||||
class Immutable:
|
class Immutable:
|
||||||
"""Immutable."""
|
"""Immutable."""
|
||||||
|
|
||||||
__slots__: Tuple[str, ...] = ('_hash',)
|
__slots__: tuple[str, ...] = ('_hash',)
|
||||||
|
|
||||||
_hash: int
|
_hash: int
|
||||||
|
|
||||||
|
@ -48,7 +49,7 @@ class Immutable:
|
||||||
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
|
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __base__(cls) -> "Type[Immutable]":
|
def __base__(cls) -> "type[Immutable]":
|
||||||
"""Get base class."""
|
"""Get base class."""
|
||||||
|
|
||||||
return cls
|
return cls
|
||||||
|
@ -99,7 +100,7 @@ class ImmutableDict(Mapping[Any, Any]):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]
|
arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
|
@ -107,7 +108,7 @@ class ImmutableDict(Mapping[Any, Any]):
|
||||||
self._d = dict(arg)
|
self._d = dict(arg)
|
||||||
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
|
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
|
||||||
|
|
||||||
def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None:
|
def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
|
||||||
"""Validate arguments."""
|
"""Validate arguments."""
|
||||||
|
|
||||||
if isinstance(arg, dict):
|
if isinstance(arg, dict):
|
||||||
|
@ -147,12 +148,12 @@ class ImmutableDict(Mapping[Any, Any]):
|
||||||
class Namespaces(ImmutableDict):
|
class Namespaces(ImmutableDict):
|
||||||
"""Namespaces."""
|
"""Namespaces."""
|
||||||
|
|
||||||
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
|
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super().__init__(arg)
|
super().__init__(arg)
|
||||||
|
|
||||||
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
|
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||||
"""Validate arguments."""
|
"""Validate arguments."""
|
||||||
|
|
||||||
if isinstance(arg, dict):
|
if isinstance(arg, dict):
|
||||||
|
@ -165,12 +166,12 @@ class Namespaces(ImmutableDict):
|
||||||
class CustomSelectors(ImmutableDict):
|
class CustomSelectors(ImmutableDict):
|
||||||
"""Custom selectors."""
|
"""Custom selectors."""
|
||||||
|
|
||||||
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
|
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super().__init__(arg)
|
super().__init__(arg)
|
||||||
|
|
||||||
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
|
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||||
"""Validate arguments."""
|
"""Validate arguments."""
|
||||||
|
|
||||||
if isinstance(arg, dict):
|
if isinstance(arg, dict):
|
||||||
|
@ -188,30 +189,30 @@ class Selector(Immutable):
|
||||||
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
|
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
|
||||||
)
|
)
|
||||||
|
|
||||||
tag: Optional['SelectorTag']
|
tag: Optional[SelectorTag]
|
||||||
ids: Tuple[str, ...]
|
ids: tuple[str, ...]
|
||||||
classes: Tuple[str, ...]
|
classes: tuple[str, ...]
|
||||||
attributes: Tuple['SelectorAttribute', ...]
|
attributes: tuple[SelectorAttribute, ...]
|
||||||
nth: Tuple['SelectorNth', ...]
|
nth: tuple[SelectorNth, ...]
|
||||||
selectors: Tuple['SelectorList', ...]
|
selectors: tuple[SelectorList, ...]
|
||||||
relation: 'SelectorList'
|
relation: SelectorList
|
||||||
rel_type: Optional[str]
|
rel_type: Optional[str]
|
||||||
contains: Tuple['SelectorContains', ...]
|
contains: tuple[SelectorContains, ...]
|
||||||
lang: Tuple['SelectorLang', ...]
|
lang: tuple[SelectorLang, ...]
|
||||||
flags: int
|
flags: int
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
tag: Optional['SelectorTag'],
|
tag: Optional[SelectorTag],
|
||||||
ids: Tuple[str, ...],
|
ids: tuple[str, ...],
|
||||||
classes: Tuple[str, ...],
|
classes: tuple[str, ...],
|
||||||
attributes: Tuple['SelectorAttribute', ...],
|
attributes: tuple[SelectorAttribute, ...],
|
||||||
nth: Tuple['SelectorNth', ...],
|
nth: tuple[SelectorNth, ...],
|
||||||
selectors: Tuple['SelectorList', ...],
|
selectors: tuple[SelectorList, ...],
|
||||||
relation: 'SelectorList',
|
relation: SelectorList,
|
||||||
rel_type: Optional[str],
|
rel_type: Optional[str],
|
||||||
contains: Tuple['SelectorContains', ...],
|
contains: tuple[SelectorContains, ...],
|
||||||
lang: Tuple['SelectorLang', ...],
|
lang: tuple[SelectorLang, ...],
|
||||||
flags: int
|
flags: int
|
||||||
):
|
):
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
@ -286,7 +287,7 @@ class SelectorContains(Immutable):
|
||||||
|
|
||||||
__slots__ = ("text", "own", "_hash")
|
__slots__ = ("text", "own", "_hash")
|
||||||
|
|
||||||
text: Tuple[str, ...]
|
text: tuple[str, ...]
|
||||||
own: bool
|
own: bool
|
||||||
|
|
||||||
def __init__(self, text: Iterable[str], own: bool) -> None:
|
def __init__(self, text: Iterable[str], own: bool) -> None:
|
||||||
|
@ -305,9 +306,9 @@ class SelectorNth(Immutable):
|
||||||
b: int
|
b: int
|
||||||
of_type: bool
|
of_type: bool
|
||||||
last: bool
|
last: bool
|
||||||
selectors: 'SelectorList'
|
selectors: SelectorList
|
||||||
|
|
||||||
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None:
|
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super().__init__(
|
super().__init__(
|
||||||
|
@ -325,7 +326,7 @@ class SelectorLang(Immutable):
|
||||||
|
|
||||||
__slots__ = ("languages", "_hash",)
|
__slots__ = ("languages", "_hash",)
|
||||||
|
|
||||||
languages: Tuple[str, ...]
|
languages: tuple[str, ...]
|
||||||
|
|
||||||
def __init__(self, languages: Iterable[str]):
|
def __init__(self, languages: Iterable[str]):
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
@ -353,13 +354,13 @@ class SelectorList(Immutable):
|
||||||
|
|
||||||
__slots__ = ("selectors", "is_not", "is_html", "_hash")
|
__slots__ = ("selectors", "is_not", "is_html", "_hash")
|
||||||
|
|
||||||
selectors: Tuple[Union['Selector', 'SelectorNull'], ...]
|
selectors: tuple[Selector | SelectorNull, ...]
|
||||||
is_not: bool
|
is_not: bool
|
||||||
is_html: bool
|
is_html: bool
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None,
|
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
|
||||||
is_not: bool = False,
|
is_not: bool = False,
|
||||||
is_html: bool = False
|
is_html: bool = False
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -371,7 +372,7 @@ class SelectorList(Immutable):
|
||||||
is_html=is_html
|
is_html=is_html
|
||||||
)
|
)
|
||||||
|
|
||||||
def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]:
|
def __iter__(self) -> Iterator[Selector | SelectorNull]:
|
||||||
"""Iterator."""
|
"""Iterator."""
|
||||||
|
|
||||||
return iter(self.selectors)
|
return iter(self.selectors)
|
||||||
|
@ -381,7 +382,7 @@ class SelectorList(Immutable):
|
||||||
|
|
||||||
return len(self.selectors)
|
return len(self.selectors)
|
||||||
|
|
||||||
def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']:
|
def __getitem__(self, index: int) -> Selector | SelectorNull:
|
||||||
"""Get item."""
|
"""Get item."""
|
||||||
|
|
||||||
return self.selectors[index]
|
return self.selectors[index]
|
||||||
|
|
|
@ -65,6 +65,7 @@ SelectorList(
|
||||||
is_html=False)
|
is_html=False)
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
"""Utility."""
|
"""Utility."""
|
||||||
|
from __future__ import annotations
|
||||||
from functools import wraps, lru_cache
|
from functools import wraps, lru_cache
|
||||||
import warnings
|
import warnings
|
||||||
import re
|
import re
|
||||||
from typing import Callable, Any, Optional, Tuple, List
|
from typing import Callable, Any, Optional
|
||||||
|
|
||||||
DEBUG = 0x00001
|
DEBUG = 0x00001
|
||||||
|
|
||||||
|
@ -75,13 +76,13 @@ def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no co
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]:
|
def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
|
||||||
"""Get the pattern context."""
|
"""Get the pattern context."""
|
||||||
|
|
||||||
last = 0
|
last = 0
|
||||||
current_line = 1
|
current_line = 1
|
||||||
col = 1
|
col = 1
|
||||||
text = [] # type: List[str]
|
text = [] # type: list[str]
|
||||||
line = 1
|
line = 1
|
||||||
offset = None # type: Optional[int]
|
offset = None # type: Optional[int]
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ arrow==1.2.3
|
||||||
backports.csv==1.0.7
|
backports.csv==1.0.7
|
||||||
backports.functools-lru-cache==1.6.4
|
backports.functools-lru-cache==1.6.4
|
||||||
backports.zoneinfo==0.2.1;python_version<"3.9"
|
backports.zoneinfo==0.2.1;python_version<"3.9"
|
||||||
beautifulsoup4==4.11.1
|
beautifulsoup4==4.11.2
|
||||||
bleach==6.0.0
|
bleach==6.0.0
|
||||||
certifi==2022.12.7
|
certifi==2022.12.7
|
||||||
cheroot==9.0.0
|
cheroot==9.0.0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue