mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-08-14 02:26:58 -07:00
Bump beautifulsoup4 from 4.11.2 to 4.12.2 (#2037)
* Bump beautifulsoup4 from 4.11.2 to 4.12.2 Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.11.2 to 4.12.2. --- updated-dependencies: - dependency-name: beautifulsoup4 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update beautifulsoup4==4.12.2 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
This commit is contained in:
parent
1798594569
commit
e70e08c3f5
32 changed files with 1439 additions and 755 deletions
|
@ -8,14 +8,8 @@ except ImportError as e:
|
|||
import re
|
||||
import sys
|
||||
import warnings
|
||||
try:
|
||||
import soupsieve
|
||||
except ImportError as e:
|
||||
soupsieve = None
|
||||
warnings.warn(
|
||||
'The soupsieve package is not installed. CSS selectors cannot be used.'
|
||||
)
|
||||
|
||||
from bs4.css import CSS
|
||||
from bs4.formatter import (
|
||||
Formatter,
|
||||
HTMLFormatter,
|
||||
|
@ -69,13 +63,13 @@ PYTHON_SPECIFIC_ENCODINGS = set([
|
|||
"string-escape",
|
||||
"string_escape",
|
||||
])
|
||||
|
||||
|
||||
|
||||
class NamespacedAttribute(str):
|
||||
"""A namespaced string (e.g. 'xml:lang') that remembers the namespace
|
||||
('xml') and the name ('lang') that were used to create it.
|
||||
"""
|
||||
|
||||
|
||||
def __new__(cls, prefix, name=None, namespace=None):
|
||||
if not name:
|
||||
# This is the default namespace. Its name "has no value"
|
||||
|
@ -146,14 +140,19 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
|
|||
return match.group(1) + encoding
|
||||
return self.CHARSET_RE.sub(rewrite, self.original_value)
|
||||
|
||||
|
||||
|
||||
class PageElement(object):
|
||||
"""Contains the navigational information for some part of the page:
|
||||
that is, its current location in the parse tree.
|
||||
|
||||
NavigableString, Tag, etc. are all subclasses of PageElement.
|
||||
"""
|
||||
|
||||
|
||||
# In general, we can't tell just by looking at an element whether
|
||||
# it's contained in an XML document or an HTML document. But for
|
||||
# Tags (q.v.) we can store this information at parse time.
|
||||
known_xml = None
|
||||
|
||||
def setup(self, parent=None, previous_element=None, next_element=None,
|
||||
previous_sibling=None, next_sibling=None):
|
||||
"""Sets up the initial relations between this element and
|
||||
|
@ -163,7 +162,7 @@ class PageElement(object):
|
|||
|
||||
:param previous_element: The element parsed immediately before
|
||||
this one.
|
||||
|
||||
|
||||
:param next_element: The element parsed immediately before
|
||||
this one.
|
||||
|
||||
|
@ -257,11 +256,11 @@ class PageElement(object):
|
|||
default = object()
|
||||
def _all_strings(self, strip=False, types=default):
|
||||
"""Yield all strings of certain classes, possibly stripping them.
|
||||
|
||||
|
||||
This is implemented differently in Tag and NavigableString.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@property
|
||||
def stripped_strings(self):
|
||||
"""Yield all strings in this PageElement, stripping them first.
|
||||
|
@ -294,11 +293,11 @@ class PageElement(object):
|
|||
strip, types=types)])
|
||||
getText = get_text
|
||||
text = property(get_text)
|
||||
|
||||
|
||||
def replace_with(self, *args):
|
||||
"""Replace this PageElement with one or more PageElements, keeping the
|
||||
"""Replace this PageElement with one or more PageElements, keeping the
|
||||
rest of the tree the same.
|
||||
|
||||
|
||||
:param args: One or more PageElements.
|
||||
:return: `self`, no longer part of the tree.
|
||||
"""
|
||||
|
@ -410,7 +409,7 @@ class PageElement(object):
|
|||
This works the same way as `list.insert`.
|
||||
|
||||
:param position: The numeric position that should be occupied
|
||||
in `self.children` by the new PageElement.
|
||||
in `self.children` by the new PageElement.
|
||||
:param new_child: A PageElement.
|
||||
"""
|
||||
if new_child is None:
|
||||
|
@ -546,7 +545,7 @@ class PageElement(object):
|
|||
"Element has no parent, so 'after' has no meaning.")
|
||||
if any(x is self for x in args):
|
||||
raise ValueError("Can't insert an element after itself.")
|
||||
|
||||
|
||||
offset = 0
|
||||
for successor in args:
|
||||
# Extract first so that the index won't be screwed up if they
|
||||
|
@ -912,7 +911,7 @@ class PageElement(object):
|
|||
:rtype: bool
|
||||
"""
|
||||
return getattr(self, '_decomposed', False) or False
|
||||
|
||||
|
||||
# Old non-property versions of the generators, for backwards
|
||||
# compatibility with BS3.
|
||||
def nextGenerator(self):
|
||||
|
@ -936,16 +935,11 @@ class NavigableString(str, PageElement):
|
|||
|
||||
When Beautiful Soup parses the markup <b>penguin</b>, it will
|
||||
create a NavigableString for the string "penguin".
|
||||
"""
|
||||
"""
|
||||
|
||||
PREFIX = ''
|
||||
SUFFIX = ''
|
||||
|
||||
# We can't tell just by looking at a string whether it's contained
|
||||
# in an XML document or an HTML document.
|
||||
|
||||
known_xml = None
|
||||
|
||||
def __new__(cls, value):
|
||||
"""Create a new NavigableString.
|
||||
|
||||
|
@ -961,12 +955,22 @@ class NavigableString(str, PageElement):
|
|||
u.setup()
|
||||
return u
|
||||
|
||||
def __copy__(self):
|
||||
def __deepcopy__(self, memo, recursive=False):
|
||||
"""A copy of a NavigableString has the same contents and class
|
||||
as the original, but it is not connected to the parse tree.
|
||||
|
||||
:param recursive: This parameter is ignored; it's only defined
|
||||
so that NavigableString.__deepcopy__ implements the same
|
||||
signature as Tag.__deepcopy__.
|
||||
"""
|
||||
return type(self)(self)
|
||||
|
||||
def __copy__(self):
|
||||
"""A copy of a NavigableString can only be a deep copy, because
|
||||
only one PageElement can occupy a given place in a parse tree.
|
||||
"""
|
||||
return self.__deepcopy__({})
|
||||
|
||||
def __getnewargs__(self):
|
||||
return (str(self),)
|
||||
|
||||
|
@ -1059,10 +1063,10 @@ class PreformattedString(NavigableString):
|
|||
as comments (the Comment class) and CDATA blocks (the CData
|
||||
class).
|
||||
"""
|
||||
|
||||
|
||||
PREFIX = ''
|
||||
SUFFIX = ''
|
||||
|
||||
|
||||
def output_ready(self, formatter=None):
|
||||
"""Make this string ready for output by adding any subclass-specific
|
||||
prefix or suffix.
|
||||
|
@ -1144,7 +1148,7 @@ class Stylesheet(NavigableString):
|
|||
"""
|
||||
pass
|
||||
|
||||
|
||||
|
||||
class Script(NavigableString):
|
||||
"""A NavigableString representing an executable script (probably
|
||||
Javascript).
|
||||
|
@ -1250,7 +1254,7 @@ class Tag(PageElement):
|
|||
if ((not builder or builder.store_line_numbers)
|
||||
and (sourceline is not None or sourcepos is not None)):
|
||||
self.sourceline = sourceline
|
||||
self.sourcepos = sourcepos
|
||||
self.sourcepos = sourcepos
|
||||
if attrs is None:
|
||||
attrs = {}
|
||||
elif attrs:
|
||||
|
@ -1308,13 +1312,49 @@ class Tag(PageElement):
|
|||
self.interesting_string_types = builder.string_containers[self.name]
|
||||
else:
|
||||
self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES
|
||||
|
||||
|
||||
parserClass = _alias("parser_class") # BS3
|
||||
|
||||
def __copy__(self):
|
||||
"""A copy of a Tag is a new Tag, unconnected to the parse tree.
|
||||
def __deepcopy__(self, memo, recursive=True):
|
||||
"""A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
|
||||
Its contents are a copy of the old Tag's contents.
|
||||
"""
|
||||
clone = self._clone()
|
||||
|
||||
if recursive:
|
||||
# Clone this tag's descendants recursively, but without
|
||||
# making any recursive function calls.
|
||||
tag_stack = [clone]
|
||||
for event, element in self._event_stream(self.descendants):
|
||||
if event is Tag.END_ELEMENT_EVENT:
|
||||
# Stop appending incoming Tags to the Tag that was
|
||||
# just closed.
|
||||
tag_stack.pop()
|
||||
else:
|
||||
descendant_clone = element.__deepcopy__(
|
||||
memo, recursive=False
|
||||
)
|
||||
# Add to its parent's .contents
|
||||
tag_stack[-1].append(descendant_clone)
|
||||
|
||||
if event is Tag.START_ELEMENT_EVENT:
|
||||
# Add the Tag itself to the stack so that its
|
||||
# children will be .appended to it.
|
||||
tag_stack.append(descendant_clone)
|
||||
return clone
|
||||
|
||||
def __copy__(self):
|
||||
"""A copy of a Tag must always be a deep copy, because a Tag's
|
||||
children can only have one parent at a time.
|
||||
"""
|
||||
return self.__deepcopy__({})
|
||||
|
||||
def _clone(self):
|
||||
"""Create a new Tag just like this one, but with no
|
||||
contents and unattached to any parse tree.
|
||||
|
||||
This is the first step in the deepcopy process.
|
||||
"""
|
||||
clone = type(self)(
|
||||
None, self.builder, self.name, self.namespace,
|
||||
self.prefix, self.attrs, is_xml=self._is_xml,
|
||||
|
@ -1326,8 +1366,6 @@ class Tag(PageElement):
|
|||
)
|
||||
for attr in ('can_be_empty_element', 'hidden'):
|
||||
setattr(clone, attr, getattr(self, attr))
|
||||
for child in self.contents:
|
||||
clone.append(child.__copy__())
|
||||
return clone
|
||||
|
||||
@property
|
||||
|
@ -1433,7 +1471,7 @@ class Tag(PageElement):
|
|||
i.contents = []
|
||||
i._decomposed = True
|
||||
i = n
|
||||
|
||||
|
||||
def clear(self, decompose=False):
|
||||
"""Wipe out all children of this PageElement by calling extract()
|
||||
on them.
|
||||
|
@ -1521,7 +1559,7 @@ class Tag(PageElement):
|
|||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return value
|
||||
|
||||
|
||||
def has_attr(self, key):
|
||||
"""Does this PageElement have an attribute with the given name?"""
|
||||
return key in self.attrs
|
||||
|
@ -1608,7 +1646,7 @@ class Tag(PageElement):
|
|||
def __repr__(self, encoding="unicode-escape"):
|
||||
"""Renders this PageElement as a string.
|
||||
|
||||
:param encoding: The encoding to use (Python 2 only).
|
||||
:param encoding: The encoding to use (Python 2 only).
|
||||
TODO: This is now ignored and a warning should be issued
|
||||
if a value is provided.
|
||||
:return: A (Unicode) string.
|
||||
|
@ -1650,106 +1688,212 @@ class Tag(PageElement):
|
|||
|
||||
def decode(self, indent_level=None,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
formatter="minimal"):
|
||||
"""Render a Unicode representation of this PageElement and its
|
||||
contents.
|
||||
|
||||
:param indent_level: Each line of the rendering will be
|
||||
indented this many spaces. Used internally in
|
||||
recursive calls while pretty-printing.
|
||||
:param eventual_encoding: The tag is destined to be
|
||||
encoded into this encoding. This method is _not_
|
||||
responsible for performing that encoding. This information
|
||||
is passed in so that it can be substituted in if the
|
||||
document contains a <META> tag that mentions the document's
|
||||
encoding.
|
||||
:param formatter: A Formatter object, or a string naming one of
|
||||
the standard formatters.
|
||||
"""
|
||||
|
||||
formatter="minimal",
|
||||
iterator=None):
|
||||
pieces = []
|
||||
# First off, turn a non-Formatter `formatter` into a Formatter
|
||||
# object. This will stop the lookup from happening over and
|
||||
# over again.
|
||||
if not isinstance(formatter, Formatter):
|
||||
formatter = self.formatter_for_name(formatter)
|
||||
attributes = formatter.attributes(self)
|
||||
attrs = []
|
||||
for key, val in attributes:
|
||||
if val is None:
|
||||
decoded = key
|
||||
|
||||
if indent_level is True:
|
||||
indent_level = 0
|
||||
|
||||
# The currently active tag that put us into string literal
|
||||
# mode. Until this element is closed, children will be treated
|
||||
# as string literals and not pretty-printed. String literal
|
||||
# mode is turned on immediately after this tag begins, and
|
||||
# turned off immediately before it's closed. This means there
|
||||
# will be whitespace before and after the tag itself.
|
||||
string_literal_tag = None
|
||||
|
||||
for event, element in self._event_stream(iterator):
|
||||
if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
|
||||
piece = element._format_tag(
|
||||
eventual_encoding, formatter, opening=True
|
||||
)
|
||||
elif event is Tag.END_ELEMENT_EVENT:
|
||||
piece = element._format_tag(
|
||||
eventual_encoding, formatter, opening=False
|
||||
)
|
||||
if indent_level is not None:
|
||||
indent_level -= 1
|
||||
else:
|
||||
if isinstance(val, list) or isinstance(val, tuple):
|
||||
val = ' '.join(val)
|
||||
elif not isinstance(val, str):
|
||||
val = str(val)
|
||||
elif (
|
||||
isinstance(val, AttributeValueWithCharsetSubstitution)
|
||||
and eventual_encoding is not None
|
||||
):
|
||||
val = val.encode(eventual_encoding)
|
||||
piece = element.output_ready(formatter)
|
||||
|
||||
text = formatter.attribute_value(val)
|
||||
decoded = (
|
||||
str(key) + '='
|
||||
+ formatter.quoted_attribute_value(text))
|
||||
attrs.append(decoded)
|
||||
close = ''
|
||||
closeTag = ''
|
||||
# Now we need to apply the 'prettiness' -- extra
|
||||
# whitespace before and/or after this tag. This can get
|
||||
# complicated because certain tags, like <pre> and
|
||||
# <script>, can't be prettified, since adding whitespace would
|
||||
# change the meaning of the content.
|
||||
|
||||
# The default behavior is to add whitespace before and
|
||||
# after an element when string literal mode is off, and to
|
||||
# leave things as they are when string literal mode is on.
|
||||
if string_literal_tag:
|
||||
indent_before = indent_after = False
|
||||
else:
|
||||
indent_before = indent_after = True
|
||||
|
||||
# The only time the behavior is more complex than that is
|
||||
# when we encounter an opening or closing tag that might
|
||||
# put us into or out of string literal mode.
|
||||
if (event is Tag.START_ELEMENT_EVENT
|
||||
and not string_literal_tag
|
||||
and not element._should_pretty_print()):
|
||||
# We are about to enter string literal mode. Add
|
||||
# whitespace before this tag, but not after. We
|
||||
# will stay in string literal mode until this tag
|
||||
# is closed.
|
||||
indent_before = True
|
||||
indent_after = False
|
||||
string_literal_tag = element
|
||||
elif (event is Tag.END_ELEMENT_EVENT
|
||||
and element is string_literal_tag):
|
||||
# We are about to exit string literal mode by closing
|
||||
# the tag that sent us into that mode. Add whitespace
|
||||
# after this tag, but not before.
|
||||
indent_before = False
|
||||
indent_after = True
|
||||
string_literal_tag = None
|
||||
|
||||
# Now we know whether to add whitespace before and/or
|
||||
# after this element.
|
||||
if indent_level is not None:
|
||||
if (indent_before or indent_after):
|
||||
if isinstance(element, NavigableString):
|
||||
piece = piece.strip()
|
||||
if piece:
|
||||
piece = self._indent_string(
|
||||
piece, indent_level, formatter,
|
||||
indent_before, indent_after
|
||||
)
|
||||
if event == Tag.START_ELEMENT_EVENT:
|
||||
indent_level += 1
|
||||
pieces.append(piece)
|
||||
return "".join(pieces)
|
||||
|
||||
# Names for the different events yielded by _event_stream
|
||||
START_ELEMENT_EVENT = object()
|
||||
END_ELEMENT_EVENT = object()
|
||||
EMPTY_ELEMENT_EVENT = object()
|
||||
STRING_ELEMENT_EVENT = object()
|
||||
|
||||
def _event_stream(self, iterator=None):
|
||||
"""Yield a sequence of events that can be used to reconstruct the DOM
|
||||
for this element.
|
||||
|
||||
This lets us recreate the nested structure of this element
|
||||
(e.g. when formatting it as a string) without using recursive
|
||||
method calls.
|
||||
|
||||
This is similar in concept to the SAX API, but it's a simpler
|
||||
interface designed for internal use. The events are different
|
||||
from SAX and the arguments associated with the events are Tags
|
||||
and other Beautiful Soup objects.
|
||||
|
||||
:param iterator: An alternate iterator to use when traversing
|
||||
the tree.
|
||||
"""
|
||||
tag_stack = []
|
||||
|
||||
iterator = iterator or self.self_and_descendants
|
||||
|
||||
for c in iterator:
|
||||
# If the parent of the element we're about to yield is not
|
||||
# the tag currently on the stack, it means that the tag on
|
||||
# the stack closed before this element appeared.
|
||||
while tag_stack and c.parent != tag_stack[-1]:
|
||||
now_closed_tag = tag_stack.pop()
|
||||
yield Tag.END_ELEMENT_EVENT, now_closed_tag
|
||||
|
||||
if isinstance(c, Tag):
|
||||
if c.is_empty_element:
|
||||
yield Tag.EMPTY_ELEMENT_EVENT, c
|
||||
else:
|
||||
yield Tag.START_ELEMENT_EVENT, c
|
||||
tag_stack.append(c)
|
||||
continue
|
||||
else:
|
||||
yield Tag.STRING_ELEMENT_EVENT, c
|
||||
|
||||
while tag_stack:
|
||||
now_closed_tag = tag_stack.pop()
|
||||
yield Tag.END_ELEMENT_EVENT, now_closed_tag
|
||||
|
||||
def _indent_string(self, s, indent_level, formatter,
|
||||
indent_before, indent_after):
|
||||
"""Add indentation whitespace before and/or after a string.
|
||||
|
||||
:param s: The string to amend with whitespace.
|
||||
:param indent_level: The indentation level; affects how much
|
||||
whitespace goes before the string.
|
||||
:param indent_before: Whether or not to add whitespace
|
||||
before the string.
|
||||
:param indent_after: Whether or not to add whitespace
|
||||
(a newline) after the string.
|
||||
"""
|
||||
space_before = ''
|
||||
if indent_before and indent_level:
|
||||
space_before = (formatter.indent * indent_level)
|
||||
|
||||
space_after = ''
|
||||
if indent_after:
|
||||
space_after = "\n"
|
||||
|
||||
return space_before + s + space_after
|
||||
|
||||
def _format_tag(self, eventual_encoding, formatter, opening):
|
||||
# A tag starts with the < character (see below).
|
||||
|
||||
# Then the / character, if this is a closing tag.
|
||||
closing_slash = ''
|
||||
if not opening:
|
||||
closing_slash = '/'
|
||||
|
||||
# Then an optional namespace prefix.
|
||||
prefix = ''
|
||||
if self.prefix:
|
||||
prefix = self.prefix + ":"
|
||||
|
||||
if self.is_empty_element:
|
||||
close = formatter.void_element_close_prefix or ''
|
||||
else:
|
||||
closeTag = '</%s%s>' % (prefix, self.name)
|
||||
# Then a list of attribute values, if this is an opening tag.
|
||||
attribute_string = ''
|
||||
if opening:
|
||||
attributes = formatter.attributes(self)
|
||||
attrs = []
|
||||
for key, val in attributes:
|
||||
if val is None:
|
||||
decoded = key
|
||||
else:
|
||||
if isinstance(val, list) or isinstance(val, tuple):
|
||||
val = ' '.join(val)
|
||||
elif not isinstance(val, str):
|
||||
val = str(val)
|
||||
elif (
|
||||
isinstance(val, AttributeValueWithCharsetSubstitution)
|
||||
and eventual_encoding is not None
|
||||
):
|
||||
val = val.encode(eventual_encoding)
|
||||
|
||||
pretty_print = self._should_pretty_print(indent_level)
|
||||
space = ''
|
||||
indent_space = ''
|
||||
if indent_level is not None:
|
||||
indent_space = (formatter.indent * (indent_level - 1))
|
||||
if pretty_print:
|
||||
space = indent_space
|
||||
indent_contents = indent_level + 1
|
||||
else:
|
||||
indent_contents = None
|
||||
contents = self.decode_contents(
|
||||
indent_contents, eventual_encoding, formatter
|
||||
)
|
||||
|
||||
if self.hidden:
|
||||
# This is the 'document root' object.
|
||||
s = contents
|
||||
else:
|
||||
s = []
|
||||
attribute_string = ''
|
||||
text = formatter.attribute_value(val)
|
||||
decoded = (
|
||||
str(key) + '='
|
||||
+ formatter.quoted_attribute_value(text))
|
||||
attrs.append(decoded)
|
||||
if attrs:
|
||||
attribute_string = ' ' + ' '.join(attrs)
|
||||
if indent_level is not None:
|
||||
# Even if this particular tag is not pretty-printed,
|
||||
# we should indent up to the start of the tag.
|
||||
s.append(indent_space)
|
||||
s.append('<%s%s%s%s>' % (
|
||||
prefix, self.name, attribute_string, close))
|
||||
if pretty_print:
|
||||
s.append("\n")
|
||||
s.append(contents)
|
||||
if pretty_print and contents and contents[-1] != "\n":
|
||||
s.append("\n")
|
||||
if pretty_print and closeTag:
|
||||
s.append(space)
|
||||
s.append(closeTag)
|
||||
if indent_level is not None and closeTag and self.next_sibling:
|
||||
# Even if this particular tag is not pretty-printed,
|
||||
# we're now done with the tag, and we should add a
|
||||
# newline if appropriate.
|
||||
s.append("\n")
|
||||
s = ''.join(s)
|
||||
return s
|
||||
|
||||
def _should_pretty_print(self, indent_level):
|
||||
# Then an optional closing slash (for a void element in an
|
||||
# XML document).
|
||||
void_element_closing_slash = ''
|
||||
if self.is_empty_element:
|
||||
void_element_closing_slash = formatter.void_element_close_prefix or ''
|
||||
|
||||
# Put it all together.
|
||||
return '<' + closing_slash + prefix + self.name + attribute_string + void_element_closing_slash + '>'
|
||||
|
||||
def _should_pretty_print(self, indent_level=1):
|
||||
"""Should this tag be pretty-printed?
|
||||
|
||||
Most of them should, but some (such as <pre> in HTML
|
||||
|
@ -1770,7 +1914,7 @@ class Tag(PageElement):
|
|||
a Unicode string will be returned.
|
||||
:param formatter: A Formatter object, or a string naming one of
|
||||
the standard formatters.
|
||||
:return: A Unicode string (if encoding==None) or a bytestring
|
||||
:return: A Unicode string (if encoding==None) or a bytestring
|
||||
(otherwise).
|
||||
"""
|
||||
if encoding is None:
|
||||
|
@ -1800,33 +1944,9 @@ class Tag(PageElement):
|
|||
the standard Formatters.
|
||||
|
||||
"""
|
||||
# First off, turn a string formatter into a Formatter object. This
|
||||
# will stop the lookup from happening over and over again.
|
||||
if not isinstance(formatter, Formatter):
|
||||
formatter = self.formatter_for_name(formatter)
|
||||
return self.decode(indent_level, eventual_encoding, formatter,
|
||||
iterator=self.descendants)
|
||||
|
||||
pretty_print = (indent_level is not None)
|
||||
s = []
|
||||
for c in self:
|
||||
text = None
|
||||
if isinstance(c, NavigableString):
|
||||
text = c.output_ready(formatter)
|
||||
elif isinstance(c, Tag):
|
||||
s.append(c.decode(indent_level, eventual_encoding,
|
||||
formatter))
|
||||
preserve_whitespace = (
|
||||
self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
|
||||
)
|
||||
if text and indent_level and not preserve_whitespace:
|
||||
text = text.strip()
|
||||
if text:
|
||||
if pretty_print and not preserve_whitespace:
|
||||
s.append(formatter.indent * (indent_level - 1))
|
||||
s.append(text)
|
||||
if pretty_print and not preserve_whitespace:
|
||||
s.append("\n")
|
||||
return ''.join(s)
|
||||
|
||||
def encode_contents(
|
||||
self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
formatter="minimal"):
|
||||
|
@ -1922,6 +2042,18 @@ class Tag(PageElement):
|
|||
# return iter() to make the purpose of the method clear
|
||||
return iter(self.contents) # XXX This seems to be untested.
|
||||
|
||||
@property
|
||||
def self_and_descendants(self):
|
||||
"""Iterate over this PageElement and its children in a
|
||||
breadth-first sequence.
|
||||
|
||||
:yield: A sequence of PageElements.
|
||||
"""
|
||||
if not self.hidden:
|
||||
yield self
|
||||
for i in self.descendants:
|
||||
yield i
|
||||
|
||||
@property
|
||||
def descendants(self):
|
||||
"""Iterate over all children of this PageElement in a
|
||||
|
@ -1948,16 +2080,13 @@ class Tag(PageElement):
|
|||
Beautiful Soup will use the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
:param kwargs: Keyword arguments to be passed into Soup Sieve's
|
||||
soupsieve.select() method.
|
||||
|
||||
:return: A Tag.
|
||||
:rtype: bs4.element.Tag
|
||||
"""
|
||||
value = self.select(selector, namespaces, 1, **kwargs)
|
||||
if value:
|
||||
return value[0]
|
||||
return None
|
||||
return self.css.select_one(selector, namespaces, **kwargs)
|
||||
|
||||
def select(self, selector, namespaces=None, limit=None, **kwargs):
|
||||
"""Perform a CSS selection operation on the current element.
|
||||
|
@ -1973,27 +2102,18 @@ class Tag(PageElement):
|
|||
|
||||
:param limit: After finding this number of results, stop looking.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
soupsieve.select() method.
|
||||
|
||||
:return: A ResultSet of Tags.
|
||||
:rtype: bs4.element.ResultSet
|
||||
"""
|
||||
if namespaces is None:
|
||||
namespaces = self._namespaces
|
||||
|
||||
if limit is None:
|
||||
limit = 0
|
||||
if soupsieve is None:
|
||||
raise NotImplementedError(
|
||||
"Cannot execute CSS selectors because the soupsieve package is not installed."
|
||||
)
|
||||
|
||||
results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
|
||||
return self.css.select(selector, namespaces, limit, **kwargs)
|
||||
|
||||
# We do this because it's more consistent and because
|
||||
# ResultSet.__getattr__ has a helpful error message.
|
||||
return ResultSet(None, results)
|
||||
@property
|
||||
def css(self):
|
||||
"""Return an interface to the CSS selector API."""
|
||||
return CSS(self)
|
||||
|
||||
# Old names for backwards compatibility
|
||||
def childGenerator(self):
|
||||
|
@ -2038,7 +2158,7 @@ class SoupStrainer(object):
|
|||
:param attrs: A dictionary of filters on attribute values.
|
||||
:param string: A filter for a NavigableString with specific text.
|
||||
:kwargs: A dictionary of filters on attribute values.
|
||||
"""
|
||||
"""
|
||||
if string is None and 'text' in kwargs:
|
||||
string = kwargs.pop('text')
|
||||
warnings.warn(
|
||||
|
@ -2137,7 +2257,7 @@ class SoupStrainer(object):
|
|||
# looking at a tag with a different name.
|
||||
if markup and not markup.prefix and self.name != markup.name:
|
||||
return False
|
||||
|
||||
|
||||
call_function_with_tag_data = (
|
||||
isinstance(self.name, Callable)
|
||||
and not isinstance(markup_name, Tag))
|
||||
|
@ -2223,7 +2343,7 @@ class SoupStrainer(object):
|
|||
if self._matches(' '.join(markup), match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
if match_against is True:
|
||||
# True matches any non-None value.
|
||||
return markup is not None
|
||||
|
@ -2267,11 +2387,11 @@ class SoupStrainer(object):
|
|||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# Beyond this point we might need to run the test twice: once against
|
||||
# the tag's name and once against its prefixed name.
|
||||
match = False
|
||||
|
||||
|
||||
if not match and isinstance(match_against, str):
|
||||
# Exact string match
|
||||
match = markup == match_against
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue