Bump beautifulsoup4 from 4.11.2 to 4.12.2 (#2037)

* Bump beautifulsoup4 from 4.11.2 to 4.12.2 Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.11.2 to 4.12.2. --- updated-dependencies: - dependency-name: beautifulsoup4 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update beautifulsoup4==4.12.2 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-08-20 13:23:24 -07:00 · 2023-08-23 21:38:49 -07:00 · 2023-08-23 21:38:49 -07:00 · e70e08c3f5
commit e70e08c3f5
parent 1798594569
32 changed files with 1439 additions and 755 deletions
--- a/lib/bs4/init.py
+++ b/lib/bs4/init.py
@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.11.2"
+__version__ = "4.12.2"
 __copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
@ -38,11 +38,13 @@ from .builder import (
    builder_registry,
    ParserRejectedMarkup,
    XMLParsedAsHTMLWarning,
    HTMLParserTreeBuilder
 )
 from .dammit import UnicodeDammit
 from .element import (
    CData,
    Comment,
    CSS,
    DEFAULT_OUTPUT_ENCODING,
    Declaration,
    Doctype,
@ -116,7 +118,7 @@ class BeautifulSoup(Tag):
    ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
-    
+   
    def __init__(self, markup="", features=None, builder=None,
                 parse_only=None, from_encoding=None, exclude_encodings=None,
                 element_classes=None, **kwargs):
@ -348,25 +350,49 @@ class BeautifulSoup(Tag):
        self.markup = None
        self.builder.soup = None
-    def __copy__(self):
+    def _clone(self):
-        """Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
+        """Create a new BeautifulSoup object with the same TreeBuilder,
-        copy = type(self)(
+        but not associated with any markup.
            self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
        )
-        # Although we encoded the tree to UTF-8, that may not have
+        This is the first step of the deepcopy process.
-        # been the encoding of the original markup. Set the copy's
+        """
-        # .original_encoding to reflect the original object's
+        clone = type(self)("", None, self.builder)
        # .original_encoding.
        copy.original_encoding = self.original_encoding
        return copy
        # Keep track of the encoding of the original document,
        # since we won't be parsing it again.
        clone.original_encoding = self.original_encoding
        return clone
    def __getstate__(self):
        # Frequently a tree builder can't be pickled.
        d = dict(self.__dict__)
        if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
-            d['builder'] = None
+            d['builder'] = type(self.builder)
        # Store the contents as a Unicode string.
        d['contents'] = []
        d['markup'] = self.decode()
        # If _most_recent_element is present, it's a Tag object left
        # over from initial parse. It might not be picklable and we
        # don't need it.
        if '_most_recent_element' in d:
            del d['_most_recent_element']
        return d
    def __setstate__(self, state):
        # If necessary, restore the TreeBuilder by looking it up.
        self.__dict__ = state
        if isinstance(self.builder, type):
            self.builder = self.builder()
        elif not self.builder:
            # We don't know which builder was used to build this
            # parse tree, so use a default we know is always available.
            self.builder = HTMLParserTreeBuilder()
        self.builder.soup = self
        self.reset()
        self._feed()
        return state
    @classmethod
    def _decode_markup(cls, markup):
@ -468,6 +494,7 @@ class BeautifulSoup(Tag):
        self.open_tag_counter = Counter()
        self.preserve_whitespace_tag_stack = []
        self.string_container_stack = []
        self._most_recent_element = None
        self.pushTag(self)
    def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@ -749,7 +776,7 @@ class BeautifulSoup(Tag):
    def decode(self, pretty_print=False,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal", iterator=None):
        """Returns a string or Unicode representation of the parse tree
            as an HTML or XML document.
@ -776,7 +803,7 @@ class BeautifulSoup(Tag):
        else:
            indent_level = 0
        return prefix + super(BeautifulSoup, self).decode(
-            indent_level, eventual_encoding, formatter)
+            indent_level, eventual_encoding, formatter, iterator)
 # Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
 _s = BeautifulSoup
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@ -24,6 +24,7 @@ from bs4.dammit import EntitySubstitution, UnicodeDammit
 from bs4.builder import (
    DetectsXMLParsedAsHTML,
    ParserRejectedMarkup,
    HTML,
    HTMLTreeBuilder,
    STRICT,
@ -70,6 +71,22 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
        self._initialize_xml_detector()
    def error(self, message):
        # NOTE: This method is required so long as Python 3.9 is
        # supported. The corresponding code is removed from HTMLParser
        # in 3.5, but not removed from ParserBase until 3.10.
        # https://github.com/python/cpython/issues/76025
        #
        # The original implementation turned the error into a warning,
        # but in every case I discovered, this made HTMLParser
        # immediately crash with an error message that was less
        # helpful than the warning. The new implementation makes it
        # more clear that html.parser just can't parse this
        # markup. The 3.10 implementation does the same, though it
        # raises AssertionError rather than calling a method. (We
        # catch this error and wrap it in a ParserRejectedMarkup.)
        raise ParserRejectedMarkup(message)
    def handle_startendtag(self, name, attrs):
        """Handle an incoming empty-element tag.
@ -359,6 +376,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
-        parser.feed(markup)
+        try:
            parser.feed(markup)
        except AssertionError as e:
            # html.parser raises AssertionError in rare cases to
            # indicate a fatal problem with the markup, especially
            # when there's an error in the doctype declaration.
            raise ParserRejectedMarkup(e)
        parser.close()
        parser.already_closed_empty_element = []
--- a/lib/bs4/css.py
+++ b/lib/bs4/css.py
@ -0,0 +1,280 @@
 """Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
 import warnings
 try:
    import soupsieve
 except ImportError as e:
    soupsieve = None
    warnings.warn(
        'The soupsieve package is not installed. CSS selectors cannot be used.'
    )
 class CSS(object):
    """A proxy object against the soupsieve library, to simplify its
    CSS selector API.
    Acquire this object through the .css attribute on the
    BeautifulSoup object, or on the Tag you want to use as the
    starting point for a CSS selector.
    The main advantage of doing this is that the tag to be selected
    against doesn't need to be explicitly specified in the function
    calls, since it's already scoped to a tag.
    """
    def __init__(self, tag, api=soupsieve):
        """Constructor.
        You don't need to instantiate this class yourself; instead,
        access the .css attribute on the BeautifulSoup object, or on
        the Tag you want to use as the starting point for your CSS
        selector.
        :param tag: All CSS selectors will use this as their starting
        point.
        :param api: A plug-in replacement for the soupsieve module,
        designed mainly for use in tests.
        """
        if api is None:
            raise NotImplementedError(
                "Cannot execute CSS selectors because the soupsieve package is not installed."
            )
        self.api = api
        self.tag = tag
    def escape(self, ident):
        """Escape a CSS identifier.
        This is a simple wrapper around soupselect.escape(). See the
        documentation for that function for more information.
        """
        if soupsieve is None:
            raise NotImplementedError(
                "Cannot escape CSS identifiers because the soupsieve package is not installed."
            )
        return self.api.escape(ident)
    def _ns(self, ns, select):
        """Normalize a dictionary of namespaces."""
        if not isinstance(select, self.api.SoupSieve) and ns is None:
            # If the selector is a precompiled pattern, it already has
            # a namespace context compiled in, which cannot be
            # replaced.
            ns = self.tag._namespaces
        return ns
    def _rs(self, results):
        """Normalize a list of results to a Resultset.
        A ResultSet is more consistent with the rest of Beautiful
        Soup's API, and ResultSet.__getattr__ has a helpful error
        message if you try to treat a list of results as a single
        result (a common mistake).
        """
        # Import here to avoid circular import
        from bs4.element import ResultSet
        return ResultSet(None, results)
    def compile(self, select, namespaces=None, flags=0, **kwargs):
        """Pre-compile a selector and return the compiled object.
        :param selector: A CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
           used in the CSS selector to namespace URIs. By default,
           Beautiful Soup will use the prefixes it encountered while
           parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.compile() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
           soupsieve.compile() method.
        :return: A precompiled selector object.
        :rtype: soupsieve.SoupSieve
        """
        return self.api.compile(
            select, self._ns(namespaces, select), flags, **kwargs
        )
    def select_one(self, select, namespaces=None, flags=0, **kwargs):
        """Perform a CSS selection operation on the current Tag and return the
        first result.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.select_one()
        method.
        :param selector: A CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
           used in the CSS selector to namespace URIs. By default,
           Beautiful Soup will use the prefixes it encountered while
           parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.select_one() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
           soupsieve.select_one() method.
        :return: A Tag, or None if the selector has no match.
        :rtype: bs4.element.Tag
        """
        return self.api.select_one(
            select, self.tag, self._ns(namespaces, select), flags, **kwargs
        )
    def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
        """Perform a CSS selection operation on the current Tag.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.select()
        method.
        :param selector: A string containing a CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param limit: After finding this number of results, stop looking.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.select() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.select() method.
        :return: A ResultSet of Tag objects.
        :rtype: bs4.element.ResultSet
        """
        if limit is None:
            limit = 0
        return self._rs(
            self.api.select(
                select, self.tag, self._ns(namespaces, select), limit, flags,
                **kwargs
            )
        )
    def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
        """Perform a CSS selection operation on the current Tag.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.iselect()
        method. It is the same as select(), but it returns a generator
        instead of a list.
        :param selector: A string containing a CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param limit: After finding this number of results, stop looking.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.iselect() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.iselect() method.
        :return: A generator
        :rtype: types.GeneratorType
        """
        return self.api.iselect(
            select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
        )
    def closest(self, select, namespaces=None, flags=0, **kwargs):
        """Find the Tag closest to this one that matches the given selector.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.closest()
        method.
        :param selector: A string containing a CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.closest() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.closest() method.
        :return: A Tag, or None if there is no match.
        :rtype: bs4.Tag
        """
        return self.api.closest(
            select, self.tag, self._ns(namespaces, select), flags, **kwargs
        )
    def match(self, select, namespaces=None, flags=0, **kwargs):
        """Check whether this Tag matches the given CSS selector.
        This uses the Soup Sieve library. For more information, see
        that library's documentation for the soupsieve.match()
        method.
        :param: a CSS selector.
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.match() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.match() method.
        :return: True if this Tag matches the selector; False otherwise.
        :rtype: bool
        """
        return self.api.match(
            select, self.tag, self._ns(namespaces, select), flags, **kwargs
        )
    def filter(self, select, namespaces=None, flags=0, **kwargs):
        """Filter this Tag's direct children based on the given CSS selector.
        This uses the Soup Sieve library. It works the same way as
        passing this Tag into that library's soupsieve.filter()
        method. More information, for more information see the
        documentation for soupsieve.filter().
        :param namespaces: A dictionary mapping namespace prefixes
            used in the CSS selector to namespace URIs. By default,
            Beautiful Soup will pass in the prefixes it encountered while
            parsing the document.
        :param flags: Flags to be passed into Soup Sieve's
            soupsieve.filter() method.
        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.filter() method.
        :return: A ResultSet of Tag objects.
        :rtype: bs4.element.ResultSet
        """
        return self._rs(
            self.api.filter(
                select, self.tag, self._ns(namespaces, select), flags, **kwargs
            )
        )
--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@ -59,21 +59,6 @@ def diagnose(data):
    if hasattr(data, 'read'):
        data = data.read()
    elif data.startswith("http:") or data.startswith("https:"):
        print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
        print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
        return
    else:
        try:
            if os.path.exists(data):
                print(('"%s" looks like a filename. Reading data from the file.' % data))
                with open(data) as fp:
                    data = fp.read()
        except ValueError:
            # This can happen on some platforms when the 'filename' is
            # too long. Assume it's data and not a filename.
            pass
        print("")
    for parser in basic_parsers:
        print(("Trying to parse your markup with %s" % parser))
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@ -8,14 +8,8 @@ except ImportError as e:
 import re
 import sys
 import warnings
 try:
    import soupsieve
 except ImportError as e:
    soupsieve = None
    warnings.warn(
        'The soupsieve package is not installed. CSS selectors cannot be used.'
    )
 from bs4.css import CSS
 from bs4.formatter import (
    Formatter,
    HTMLFormatter,
@ -69,13 +63,13 @@ PYTHON_SPECIFIC_ENCODINGS = set([
    "string-escape",
    "string_escape",
 ])
-    
+
 class NamespacedAttribute(str):
    """A namespaced string (e.g. 'xml:lang') that remembers the namespace
    ('xml') and the name ('lang') that were used to create it.
    """
-    
+
    def __new__(cls, prefix, name=None, namespace=None):
        if not name:
            # This is the default namespace. Its name "has no value"
@ -146,14 +140,19 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
            return match.group(1) + encoding
        return self.CHARSET_RE.sub(rewrite, self.original_value)
-    
+
 class PageElement(object):
    """Contains the navigational information for some part of the page:
    that is, its current location in the parse tree.
    NavigableString, Tag, etc. are all subclasses of PageElement.
    """
-   
+
    # In general, we can't tell just by looking at an element whether
    # it's contained in an XML document or an HTML document. But for
    # Tags (q.v.) we can store this information at parse time.
    known_xml = None
    def setup(self, parent=None, previous_element=None, next_element=None,
              previous_sibling=None, next_sibling=None):
        """Sets up the initial relations between this element and
@ -163,7 +162,7 @@ class PageElement(object):
        :param previous_element: The element parsed immediately before
            this one.
-        
+
        :param next_element: The element parsed immediately before
            this one.
@ -257,11 +256,11 @@ class PageElement(object):
    default = object()
    def _all_strings(self, strip=False, types=default):
        """Yield all strings of certain classes, possibly stripping them.
-        
+
        This is implemented differently in Tag and NavigableString.
        """
        raise NotImplementedError()
-   
+
    @property
    def stripped_strings(self):
        """Yield all strings in this PageElement, stripping them first.
@ -294,11 +293,11 @@ class PageElement(object):
                    strip, types=types)])
    getText = get_text
    text = property(get_text)
-    
+
    def replace_with(self, *args):
-        """Replace this PageElement with one or more PageElements, keeping the 
+        """Replace this PageElement with one or more PageElements, keeping the
        rest of the tree the same.
-        
+
        :param args: One or more PageElements.
        :return: `self`, no longer part of the tree.
        """
@ -410,7 +409,7 @@ class PageElement(object):
        This works the same way as `list.insert`.
        :param position: The numeric position that should be occupied
-           in `self.children` by the new PageElement. 
+           in `self.children` by the new PageElement.
        :param new_child: A PageElement.
        """
        if new_child is None:
@ -546,7 +545,7 @@ class PageElement(object):
                "Element has no parent, so 'after' has no meaning.")
        if any(x is self for x in args):
            raise ValueError("Can't insert an element after itself.")
-        
+
        offset = 0
        for successor in args:
            # Extract first so that the index won't be screwed up if they
@ -912,7 +911,7 @@ class PageElement(object):
        :rtype: bool
        """
        return getattr(self, '_decomposed', False) or False
-            
+   
    # Old non-property versions of the generators, for backwards
    # compatibility with BS3.
    def nextGenerator(self):
@ -936,16 +935,11 @@ class NavigableString(str, PageElement):
    When Beautiful Soup parses the markup <b>penguin</b>, it will
    create a NavigableString for the string "penguin".
-    """   
+    """
    PREFIX = ''
    SUFFIX = ''
    # We can't tell just by looking at a string whether it's contained
    # in an XML document or an HTML document.
    known_xml = None
    def __new__(cls, value):
        """Create a new NavigableString.
@ -961,12 +955,22 @@ class NavigableString(str, PageElement):
        u.setup()
        return u
-    def __copy__(self):
+    def __deepcopy__(self, memo, recursive=False):
        """A copy of a NavigableString has the same contents and class
        as the original, but it is not connected to the parse tree.
        :param recursive: This parameter is ignored; it's only defined
           so that NavigableString.__deepcopy__ implements the same
           signature as Tag.__deepcopy__.
        """
        return type(self)(self)
    def __copy__(self):
        """A copy of a NavigableString can only be a deep copy, because
        only one PageElement can occupy a given place in a parse tree.
        """
        return self.__deepcopy__({})
    def __getnewargs__(self):
        return (str(self),)
@ -1059,10 +1063,10 @@ class PreformattedString(NavigableString):
    as comments (the Comment class) and CDATA blocks (the CData
    class).
    """
-    
+
    PREFIX = ''
    SUFFIX = ''
-    
+
    def output_ready(self, formatter=None):
        """Make this string ready for output by adding any subclass-specific
            prefix or suffix.
@ -1144,7 +1148,7 @@ class Stylesheet(NavigableString):
    """
    pass
-    
+
 class Script(NavigableString):
    """A NavigableString representing an executable script (probably
    Javascript).
@ -1250,7 +1254,7 @@ class Tag(PageElement):
        if ((not builder or builder.store_line_numbers)
            and (sourceline is not None or sourcepos is not None)):
            self.sourceline = sourceline
-            self.sourcepos = sourcepos        
+            self.sourcepos = sourcepos
        if attrs is None:
            attrs = {}
        elif attrs:
@ -1308,13 +1312,49 @@ class Tag(PageElement):
                self.interesting_string_types = builder.string_containers[self.name]
            else:
                self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES
-            
+
    parserClass = _alias("parser_class")  # BS3
-    def __copy__(self):
+    def __deepcopy__(self, memo, recursive=True):
-        """A copy of a Tag is a new Tag, unconnected to the parse tree.
+        """A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
        Its contents are a copy of the old Tag's contents.
        """
        clone = self._clone()
        if recursive:
            # Clone this tag's descendants recursively, but without
            # making any recursive function calls.
            tag_stack = [clone]
            for event, element in self._event_stream(self.descendants):
                if event is Tag.END_ELEMENT_EVENT:
                    # Stop appending incoming Tags to the Tag that was
                    # just closed.
                    tag_stack.pop()
                else:
                    descendant_clone = element.__deepcopy__(
                        memo, recursive=False
                    )
                    # Add to its parent's .contents
                    tag_stack[-1].append(descendant_clone)
                    if event is Tag.START_ELEMENT_EVENT:
                        # Add the Tag itself to the stack so that its
                        # children will be .appended to it.
                        tag_stack.append(descendant_clone)
        return clone
    def __copy__(self):
        """A copy of a Tag must always be a deep copy, because a Tag's
        children can only have one parent at a time.
        """
        return self.__deepcopy__({})
    def _clone(self):
        """Create a new Tag just like this one, but with no
        contents and unattached to any parse tree.
        This is the first step in the deepcopy process.
        """
        clone = type(self)(
            None, self.builder, self.name, self.namespace,
            self.prefix, self.attrs, is_xml=self._is_xml,
@ -1326,8 +1366,6 @@ class Tag(PageElement):
        )
        for attr in ('can_be_empty_element', 'hidden'):
            setattr(clone, attr, getattr(self, attr))
        for child in self.contents:
            clone.append(child.__copy__())
        return clone
    @property
@ -1433,7 +1471,7 @@ class Tag(PageElement):
            i.contents = []
            i._decomposed = True
            i = n
-           
+
    def clear(self, decompose=False):
        """Wipe out all children of this PageElement by calling extract()
           on them.
@ -1521,7 +1559,7 @@ class Tag(PageElement):
        if not isinstance(value, list):
            value = [value]
        return value
-    
+
    def has_attr(self, key):
        """Does this PageElement have an attribute with the given name?"""
        return key in self.attrs
@ -1608,7 +1646,7 @@ class Tag(PageElement):
    def __repr__(self, encoding="unicode-escape"):
        """Renders this PageElement as a string.
-        :param encoding: The encoding to use (Python 2 only). 
+        :param encoding: The encoding to use (Python 2 only).
            TODO: This is now ignored and a warning should be issued
            if a value is provided.
        :return: A (Unicode) string.
@ -1650,106 +1688,212 @@ class Tag(PageElement):
    def decode(self, indent_level=None,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal",
-        """Render a Unicode representation of this PageElement and its
+               iterator=None):
-        contents.
+        pieces = []
        :param indent_level: Each line of the rendering will be
             indented this many spaces. Used internally in
             recursive calls while pretty-printing.
        :param eventual_encoding: The tag is destined to be
            encoded into this encoding. This method is _not_
            responsible for performing that encoding. This information
            is passed in so that it can be substituted in if the
            document contains a <META> tag that mentions the document's
            encoding.
        :param formatter: A Formatter object, or a string naming one of
            the standard formatters.
        """
        # First off, turn a non-Formatter `formatter` into a Formatter
        # object. This will stop the lookup from happening over and
        # over again.
        if not isinstance(formatter, Formatter):
            formatter = self.formatter_for_name(formatter)
-        attributes = formatter.attributes(self)
+
-        attrs = []
+        if indent_level is True:
-        for key, val in attributes:
+            indent_level = 0
-            if val is None:
+
-                decoded = key
+        # The currently active tag that put us into string literal
        # mode. Until this element is closed, children will be treated
        # as string literals and not pretty-printed. String literal
        # mode is turned on immediately after this tag begins, and
        # turned off immediately before it's closed. This means there
        # will be whitespace before and after the tag itself.
        string_literal_tag = None
        for event, element in self._event_stream(iterator):
            if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
                piece = element._format_tag(
                    eventual_encoding, formatter, opening=True
                )
            elif event is Tag.END_ELEMENT_EVENT:
                piece = element._format_tag(
                    eventual_encoding, formatter, opening=False
                )
                if indent_level is not None:
                    indent_level -= 1
            else:
-                if isinstance(val, list) or isinstance(val, tuple):
+                piece = element.output_ready(formatter)
                    val = ' '.join(val)
                elif not isinstance(val, str):
                    val = str(val)
                elif (
                        isinstance(val, AttributeValueWithCharsetSubstitution)
                        and eventual_encoding is not None
                ):
                    val = val.encode(eventual_encoding)
-                text = formatter.attribute_value(val)
+            # Now we need to apply the 'prettiness' -- extra
-                decoded = (
+            # whitespace before and/or after this tag. This can get
-                    str(key) + '='
+            # complicated because certain tags, like <pre> and
-                    + formatter.quoted_attribute_value(text))
+            # <script>, can't be prettified, since adding whitespace would
-            attrs.append(decoded)
+            # change the meaning of the content.
        close = ''
        closeTag = ''
            # The default behavior is to add whitespace before and
            # after an element when string literal mode is off, and to
            # leave things as they are when string literal mode is on.
            if string_literal_tag:
                indent_before = indent_after = False
            else:
                indent_before = indent_after = True
            # The only time the behavior is more complex than that is
            # when we encounter an opening or closing tag that might
            # put us into or out of string literal mode.
            if (event is Tag.START_ELEMENT_EVENT
                and not string_literal_tag
                and not element._should_pretty_print()):
                    # We are about to enter string literal mode. Add
                    # whitespace before this tag, but not after. We
                    # will stay in string literal mode until this tag
                    # is closed.
                    indent_before = True
                    indent_after = False
                    string_literal_tag = element
            elif (event is Tag.END_ELEMENT_EVENT
                  and element is string_literal_tag):
                # We are about to exit string literal mode by closing
                # the tag that sent us into that mode. Add whitespace
                # after this tag, but not before.
                indent_before = False
                indent_after = True
                string_literal_tag = None
            # Now we know whether to add whitespace before and/or
            # after this element.
            if indent_level is not None:
                if (indent_before or indent_after):
                    if isinstance(element, NavigableString):
                        piece = piece.strip()
                    if piece:
                        piece = self._indent_string(
                            piece, indent_level, formatter,
                            indent_before, indent_after
                        )
                if event == Tag.START_ELEMENT_EVENT:
                    indent_level += 1
            pieces.append(piece)
        return "".join(pieces)
    # Names for the different events yielded by _event_stream
    START_ELEMENT_EVENT = object()
    END_ELEMENT_EVENT = object()
    EMPTY_ELEMENT_EVENT = object()
    STRING_ELEMENT_EVENT = object()
    def _event_stream(self, iterator=None):
        """Yield a sequence of events that can be used to reconstruct the DOM
        for this element.
        This lets us recreate the nested structure of this element
        (e.g. when formatting it as a string) without using recursive
        method calls.
        This is similar in concept to the SAX API, but it's a simpler
        interface designed for internal use. The events are different
        from SAX and the arguments associated with the events are Tags
        and other Beautiful Soup objects.
        :param iterator: An alternate iterator to use when traversing
         the tree.
        """
        tag_stack = []
        iterator = iterator or self.self_and_descendants
        for c in iterator:
            # If the parent of the element we're about to yield is not
            # the tag currently on the stack, it means that the tag on
            # the stack closed before this element appeared.
            while tag_stack and c.parent != tag_stack[-1]:
                now_closed_tag = tag_stack.pop()
                yield Tag.END_ELEMENT_EVENT, now_closed_tag
            if isinstance(c, Tag):
                if c.is_empty_element:
                    yield Tag.EMPTY_ELEMENT_EVENT, c
                else:
                    yield Tag.START_ELEMENT_EVENT, c
                    tag_stack.append(c)
                    continue
            else:
                yield Tag.STRING_ELEMENT_EVENT, c
        while tag_stack:
            now_closed_tag = tag_stack.pop()
            yield Tag.END_ELEMENT_EVENT, now_closed_tag
    def _indent_string(self, s, indent_level, formatter,
                       indent_before, indent_after):
        """Add indentation whitespace before and/or after a string.
        :param s: The string to amend with whitespace.
        :param indent_level: The indentation level; affects how much
           whitespace goes before the string.
        :param indent_before: Whether or not to add whitespace
           before the string.
        :param indent_after: Whether or not to add whitespace
           (a newline) after the string.
        """
        space_before = ''
        if indent_before and indent_level:
            space_before = (formatter.indent * indent_level)
        space_after = ''
        if indent_after:
            space_after = "\n"
        return space_before + s + space_after
    def _format_tag(self, eventual_encoding, formatter, opening):
        # A tag starts with the < character (see below).
        # Then the / character, if this is a closing tag.
        closing_slash = ''
        if not opening:
            closing_slash = '/'
        # Then an optional namespace prefix.
        prefix = ''
        if self.prefix:
            prefix = self.prefix + ":"
-        if self.is_empty_element:
+        # Then a list of attribute values, if this is an opening tag.
-            close = formatter.void_element_close_prefix or ''
+        attribute_string = ''
-        else:
+        if opening:
-            closeTag = '</%s%s>' % (prefix, self.name)
+            attributes = formatter.attributes(self)
            attrs = []
            for key, val in attributes:
                if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, str):
                        val = str(val)
                    elif (
                            isinstance(val, AttributeValueWithCharsetSubstitution)
                            and eventual_encoding is not None
                    ):
                        val = val.encode(eventual_encoding)
-        pretty_print = self._should_pretty_print(indent_level)
+                    text = formatter.attribute_value(val)
-        space = ''
+                    decoded = (
-        indent_space = ''
+                        str(key) + '='
-        if indent_level is not None:
+                        + formatter.quoted_attribute_value(text))
-            indent_space = (formatter.indent * (indent_level - 1))
+                attrs.append(decoded)
        if pretty_print:
            space = indent_space
            indent_contents = indent_level + 1
        else:
            indent_contents = None
        contents = self.decode_contents(
            indent_contents, eventual_encoding, formatter
        )
        if self.hidden:
            # This is the 'document root' object.
            s = contents
        else:
            s = []
            attribute_string = ''
            if attrs:
                attribute_string = ' ' + ' '.join(attrs)
            if indent_level is not None:
                # Even if this particular tag is not pretty-printed,
                # we should indent up to the start of the tag.
                s.append(indent_space)
            s.append('<%s%s%s%s>' % (
                    prefix, self.name, attribute_string, close))
            if pretty_print:
                s.append("\n")
            s.append(contents)
            if pretty_print and contents and contents[-1] != "\n":
                s.append("\n")
            if pretty_print and closeTag:
                s.append(space)
            s.append(closeTag)
            if indent_level is not None and closeTag and self.next_sibling:
                # Even if this particular tag is not pretty-printed,
                # we're now done with the tag, and we should add a
                # newline if appropriate.
                s.append("\n")
            s = ''.join(s)
        return s
-    def _should_pretty_print(self, indent_level):
+        # Then an optional closing slash (for a void element in an
        # XML document).
        void_element_closing_slash = ''
        if self.is_empty_element:
            void_element_closing_slash = formatter.void_element_close_prefix or ''
        # Put it all together.
        return '<' + closing_slash + prefix + self.name + attribute_string + void_element_closing_slash + '>'
    def _should_pretty_print(self, indent_level=1):
        """Should this tag be pretty-printed?
        Most of them should, but some (such as <pre> in HTML
@ -1770,7 +1914,7 @@ class Tag(PageElement):
            a Unicode string will be returned.
        :param formatter: A Formatter object, or a string naming one of
            the standard formatters.
-        :return: A Unicode string (if encoding==None) or a bytestring 
+        :return: A Unicode string (if encoding==None) or a bytestring
            (otherwise).
        """
        if encoding is None:
@ -1800,33 +1944,9 @@ class Tag(PageElement):
            the standard Formatters.
        """
-        # First off, turn a string formatter into a Formatter object. This
+        return self.decode(indent_level, eventual_encoding, formatter,
-        # will stop the lookup from happening over and over again.
+                           iterator=self.descendants)
        if not isinstance(formatter, Formatter):
            formatter = self.formatter_for_name(formatter)
        pretty_print = (indent_level is not None)
        s = []
        for c in self:
            text = None
            if isinstance(c, NavigableString):
                text = c.output_ready(formatter)
            elif isinstance(c, Tag):
                s.append(c.decode(indent_level, eventual_encoding,
                                  formatter))
            preserve_whitespace = (
                self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
            )
            if text and indent_level and not preserve_whitespace:
                text = text.strip()
            if text:
                if pretty_print and not preserve_whitespace:
                    s.append(formatter.indent * (indent_level - 1))
                s.append(text)
                if pretty_print and not preserve_whitespace:
                    s.append("\n")
        return ''.join(s)
    def encode_contents(
        self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
        formatter="minimal"):
@ -1922,6 +2042,18 @@ class Tag(PageElement):
        # return iter() to make the purpose of the method clear
        return iter(self.contents)  # XXX This seems to be untested.
    @property
    def self_and_descendants(self):
        """Iterate over this PageElement and its children in a
        breadth-first sequence.
        :yield: A sequence of PageElements.
        """
        if not self.hidden:
            yield self
        for i in self.descendants:
            yield i
    @property
    def descendants(self):
        """Iterate over all children of this PageElement in a
@ -1948,16 +2080,13 @@ class Tag(PageElement):
           Beautiful Soup will use the prefixes it encountered while
           parsing the document.
-        :param kwargs: Keyword arguments to be passed into SoupSieve's 
+        :param kwargs: Keyword arguments to be passed into Soup Sieve's
           soupsieve.select() method.
        :return: A Tag.
        :rtype: bs4.element.Tag
        """
-        value = self.select(selector, namespaces, 1, **kwargs)
+        return self.css.select_one(selector, namespaces, **kwargs)
        if value:
            return value[0]
        return None
    def select(self, selector, namespaces=None, limit=None, **kwargs):
        """Perform a CSS selection operation on the current element.
@ -1973,27 +2102,18 @@ class Tag(PageElement):
        :param limit: After finding this number of results, stop looking.
-        :param kwargs: Keyword arguments to be passed into SoupSieve's 
+        :param kwargs: Keyword arguments to be passed into SoupSieve's
           soupsieve.select() method.
        :return: A ResultSet of Tags.
        :rtype: bs4.element.ResultSet
        """
-        if namespaces is None:
+        return self.css.select(selector, namespaces, limit, **kwargs)
            namespaces = self._namespaces
        if limit is None:
            limit = 0
        if soupsieve is None:
            raise NotImplementedError(
                "Cannot execute CSS selectors because the soupsieve package is not installed."
            )
        results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
-        # We do this because it's more consistent and because
+    @property
-        # ResultSet.__getattr__ has a helpful error message.
+    def css(self):
-        return ResultSet(None, results)
+        """Return an interface to the CSS selector API."""
        return CSS(self)
    # Old names for backwards compatibility
    def childGenerator(self):
@ -2038,7 +2158,7 @@ class SoupStrainer(object):
        :param attrs: A dictionary of filters on attribute values.
        :param string: A filter for a NavigableString with specific text.
        :kwargs: A dictionary of filters on attribute values.
-        """        
+        """
        if string is None and 'text' in kwargs:
            string = kwargs.pop('text')
            warnings.warn(
@ -2137,7 +2257,7 @@ class SoupStrainer(object):
            # looking at a tag with a different name.
            if markup and not markup.prefix and self.name != markup.name:
                 return False
-            
+
        call_function_with_tag_data = (
            isinstance(self.name, Callable)
            and not isinstance(markup_name, Tag))
@ -2223,7 +2343,7 @@ class SoupStrainer(object):
            if self._matches(' '.join(markup), match_against):
                return True
            return False
-        
+
        if match_against is True:
            # True matches any non-None value.
            return markup is not None
@ -2267,11 +2387,11 @@ class SoupStrainer(object):
                        return True
            else:
                return False
-        
+
        # Beyond this point we might need to run the test twice: once against
        # the tag's name and once against its prefixed name.
        match = False
-        
+
        if not match and isinstance(match_against, str):
            # Exact string match
            match = markup == match_against
--- a/lib/bs4/formatter.py
+++ b/lib/bs4/formatter.py
@ -97,7 +97,7 @@ class Formatter(EntitySubstitution):
        else:
            indent = ' '
        self.indent = indent
-        
+
    def substitute(self, ns):
        """Process a string that needs to undergo entity substitution.
        This may be a string encountered in an attribute value or as
--- a/lib/bs4/tests/init.py
+++ b/lib/bs4/tests/init.py
@ -297,37 +297,11 @@ class TreeBuilderSmokeTest(object):
            markup, multi_valued_attributes=multi_valued_attributes
        )
        assert soup.a['class'] == ['a', 'b', 'c']
    def test_fuzzed_input(self):
        # This test centralizes in one place the various fuzz tests
        # for Beautiful Soup created by the oss-fuzz project.
        # These strings superficially resemble markup, but they
        # generally can't be parsed into anything. The best we can
        # hope for is that parsing these strings won't crash the
        # parser.
        #
        # n.b. This markup is commented out because these fuzz tests
        # _do_ crash the parser. However the crashes are due to bugs
        # in html.parser, not Beautiful Soup -- otherwise I'd fix the
        # bugs!
        bad_markup = [
            # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
            # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
            # https://bugs.python.org/issue37747
            #
            #b'\n<![\xff\xfe\xfe\xcd\x00',
-            #https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
+    def test_invalid_doctype(self):
-            # https://bugs.python.org/issue34480
+        markup = '<![if word]>content<![endif]>'
-            #
+        markup = '<!DOCTYPE html]ff>'
-            #b'<![n\x00'
+        soup = self.soup(markup)
        ]
        for markup in bad_markup:
            with warnings.catch_warnings(record=False):
                soup = self.soup(markup)
 class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
@ -577,8 +551,8 @@ Hello, world!
        """Whitespace must be preserved in <pre> and <textarea> tags,
        even if that would mean not prettifying the markup.
        """
-        pre_markup = "<pre>   </pre>"
+        pre_markup = "<pre>a   z</pre>\n"
-        textarea_markup = "<textarea> woo\nwoo  </textarea>"
+        textarea_markup = "<textarea> woo\nwoo  </textarea>\n"
        self.assert_soup(pre_markup)
        self.assert_soup(textarea_markup)
@ -589,7 +563,7 @@ Hello, world!
        assert soup.textarea.prettify() == textarea_markup
        soup = self.soup("<textarea></textarea>")
-        assert soup.textarea.prettify() == "<textarea></textarea>"
+        assert soup.textarea.prettify() == "<textarea></textarea>\n"
    def test_nested_inline_elements(self):
        """Inline elements can be nested indefinitely."""
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320.testcase
@ -0,0 +1 @@
 ˙<!DOCTyPEV PUBLIC'''Đ'
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456.testcase
@ -0,0 +1 @@
 )<a><math><TR><a><mI><a><p><a>
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632.testcase
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912.testcase
@ -0,0 +1,2 @@
 <![ 
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896.testcase
@ -0,0 +1 @@
 -<math><sElect><mi><sElect><sElect>
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440.testcase
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464.testcase
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224.testcase
@ -0,0 +1 @@
 ñ<table><svg><html>
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400.testcase
--- a/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744.testcase
+++ b/lib/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744.testcase
--- a/lib/bs4/tests/fuzz/crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08.testcase
+++ b/lib/bs4/tests/fuzz/crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08.testcase
--- a/lib/bs4/tests/test_css.py
+++ b/lib/bs4/tests/test_css.py
@ -0,0 +1,487 @@
 import pytest
 import types
 from unittest.mock import MagicMock
 from bs4 import (
    CSS,
    BeautifulSoup,
    ResultSet,
 )
 from . import (
    SoupTest,
    SOUP_SIEVE_PRESENT,
 )
 if SOUP_SIEVE_PRESENT:
    from soupsieve import SelectorSyntaxError
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
 class TestCSSSelectors(SoupTest):
    """Test basic CSS selector functionality.
    This functionality is implemented in soupsieve, which has a much
    more comprehensive test suite, so this is basically an extra check
    that soupsieve works as expected.
    """
    HTML = """
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
 "http://www.w3.org/TR/html4/strict.dtd">
 <html>
 <head>
 <title>The title</title>
 <link rel="stylesheet" href="blah.css" type="text/css" id="l1">
 </head>
 <body>
 <custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
 <div id="main" class="fancy">
 <div id="inner">
 <h1 id="header1">An H1</h1>
 <p>Some text</p>
 <p class="onep" id="p1">Some more text</p>
 <h2 id="header2">An H2</h2>
 <p class="class1 class2 class3" id="pmulti">Another</p>
 <a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
 <h2 id="header3">Another H2</h2>
 <a id="me" href="http://simonwillison.net/" rel="me">me</a>
 <span class="s1">
 <a href="#" id="s1a1">span1a1</a>
 <a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
 <span class="span2">
 <a href="#" id="s2a1">span2a1</a>
 </span>
 <span class="span3"></span>
 <custom-dashed-tag class="dashed" id="dash2"/>
 <div data-tag="dashedvalue" id="data1"/>
 </span>
 </div>
 <x id="xid">
 <z id="zida"/>
 <z id="zidab"/>
 <z id="zidac"/>
 </x>
 <y id="yid">
 <z id="zidb"/>
 </y>
 <p lang="en" id="lang-en">English</p>
 <p lang="en-gb" id="lang-en-gb">English UK</p>
 <p lang="en-us" id="lang-en-us">English US</p>
 <p lang="fr" id="lang-fr">French</p>
 </div>
 <div id="footer">
 </div>
 """
    def setup_method(self):
        self.soup = BeautifulSoup(self.HTML, 'html.parser')
    def assert_selects(self, selector, expected_ids, **kwargs):
        results = self.soup.select(selector, **kwargs)
        assert isinstance(results, ResultSet)
        el_ids = [el['id'] for el in results]
        el_ids.sort()
        expected_ids.sort()
        assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
                selector, ', '.join(expected_ids), ', '.join(el_ids)
        )
    assertSelect = assert_selects
    def assert_select_multiple(self, *tests):
        for selector, expected_ids in tests:
            self.assert_selects(selector, expected_ids)
    def test_precompiled(self):
        sel = self.soup.css.compile('div')
        els = self.soup.select(sel)
        assert len(els) == 4
        for div in els:
            assert div.name == 'div'
        el = self.soup.select_one(sel)
        assert 'main' == el['id']
    def test_one_tag_one(self):
        els = self.soup.select('title')
        assert len(els) == 1
        assert els[0].name == 'title'
        assert els[0].contents == ['The title']
    def test_one_tag_many(self):
        els = self.soup.select('div')
        assert len(els) == 4
        for div in els:
            assert div.name == 'div'
        el = self.soup.select_one('div')
        assert 'main' == el['id']
    def test_select_one_returns_none_if_no_match(self):
        match = self.soup.select_one('nonexistenttag')
        assert None == match
    def test_tag_in_tag_one(self):
        els = self.soup.select('div div')
        self.assert_selects('div div', ['inner', 'data1'])
    def test_tag_in_tag_many(self):
        for selector in ('html div', 'html body div', 'body div'):
            self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
    def test_limit(self):
        self.assert_selects('html div', ['main'], limit=1)
        self.assert_selects('html body div', ['inner', 'main'], limit=2)
        self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
                           limit=10)
    def test_tag_no_match(self):
        assert len(self.soup.select('del')) == 0
    def test_invalid_tag(self):
        with pytest.raises(SelectorSyntaxError):
            self.soup.select('tag%t')
    def test_select_dashed_tag_ids(self):
        self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
    def test_select_dashed_by_id(self):
        dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
        assert dashed[0].name == 'custom-dashed-tag'
        assert dashed[0]['id'] == 'dash2'
    def test_dashed_tag_text(self):
        assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
    def test_select_dashed_matches_find_all(self):
        assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
    def test_header_tags(self):
        self.assert_select_multiple(
            ('h1', ['header1']),
            ('h2', ['header2', 'header3']),
        )
    def test_class_one(self):
        for selector in ('.onep', 'p.onep', 'html p.onep'):
            els = self.soup.select(selector)
            assert len(els) == 1
            assert els[0].name == 'p'
            assert els[0]['class'] == ['onep']
    def test_class_mismatched_tag(self):
        els = self.soup.select('div.onep')
        assert len(els) == 0
    def test_one_id(self):
        for selector in ('div#inner', '#inner', 'div div#inner'):
            self.assert_selects(selector, ['inner'])
    def test_bad_id(self):
        els = self.soup.select('#doesnotexist')
        assert len(els) == 0
    def test_items_in_id(self):
        els = self.soup.select('div#inner p')
        assert len(els) == 3
        for el in els:
            assert el.name == 'p'
        assert els[1]['class'] == ['onep']
        assert not els[0].has_attr('class')
    def test_a_bunch_of_emptys(self):
        for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
            assert len(self.soup.select(selector)) == 0
    def test_multi_class_support(self):
        for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
            '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
            self.assert_selects(selector, ['pmulti'])
    def test_multi_class_selection(self):
        for selector in ('.class1.class3', '.class3.class2',
                         '.class1.class2.class3'):
            self.assert_selects(selector, ['pmulti'])
    def test_child_selector(self):
        self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
        self.assert_selects('.s1 > a span', ['s1a2s1'])
    def test_child_selector_id(self):
        self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
    def test_attribute_equals(self):
        self.assert_select_multiple(
            ('p[class="onep"]', ['p1']),
            ('p[id="p1"]', ['p1']),
            ('[class="onep"]', ['p1']),
            ('[id="p1"]', ['p1']),
            ('link[rel="stylesheet"]', ['l1']),
            ('link[type="text/css"]', ['l1']),
            ('link[href="blah.css"]', ['l1']),
            ('link[href="no-blah.css"]', []),
            ('[rel="stylesheet"]', ['l1']),
            ('[type="text/css"]', ['l1']),
            ('[href="blah.css"]', ['l1']),
            ('[href="no-blah.css"]', []),
            ('p[href="no-blah.css"]', []),
            ('[href="no-blah.css"]', []),
        )
    def test_attribute_tilde(self):
        self.assert_select_multiple(
            ('p[class~="class1"]', ['pmulti']),
            ('p[class~="class2"]', ['pmulti']),
            ('p[class~="class3"]', ['pmulti']),
            ('[class~="class1"]', ['pmulti']),
            ('[class~="class2"]', ['pmulti']),
            ('[class~="class3"]', ['pmulti']),
            ('a[rel~="friend"]', ['bob']),
            ('a[rel~="met"]', ['bob']),
            ('[rel~="friend"]', ['bob']),
            ('[rel~="met"]', ['bob']),
        )
    def test_attribute_startswith(self):
        self.assert_select_multiple(
            ('[rel^="style"]', ['l1']),
            ('link[rel^="style"]', ['l1']),
            ('notlink[rel^="notstyle"]', []),
            ('[rel^="notstyle"]', []),
            ('link[rel^="notstyle"]', []),
            ('link[href^="bla"]', ['l1']),
            ('a[href^="http://"]', ['bob', 'me']),
            ('[href^="http://"]', ['bob', 'me']),
            ('[id^="p"]', ['pmulti', 'p1']),
            ('[id^="m"]', ['me', 'main']),
            ('div[id^="m"]', ['main']),
            ('a[id^="m"]', ['me']),
            ('div[data-tag^="dashed"]', ['data1'])
        )
    def test_attribute_endswith(self):
        self.assert_select_multiple(
            ('[href$=".css"]', ['l1']),
            ('link[href$=".css"]', ['l1']),
            ('link[id$="1"]', ['l1']),
            ('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
            ('div[id$="1"]', ['data1']),
            ('[id$="noending"]', []),
        )
    def test_attribute_contains(self):
        self.assert_select_multiple(
            # From test_attribute_startswith
            ('[rel*="style"]', ['l1']),
            ('link[rel*="style"]', ['l1']),
            ('notlink[rel*="notstyle"]', []),
            ('[rel*="notstyle"]', []),
            ('link[rel*="notstyle"]', []),
            ('link[href*="bla"]', ['l1']),
            ('[href*="http://"]', ['bob', 'me']),
            ('[id*="p"]', ['pmulti', 'p1']),
            ('div[id*="m"]', ['main']),
            ('a[id*="m"]', ['me']),
            # From test_attribute_endswith
            ('[href*=".css"]', ['l1']),
            ('link[href*=".css"]', ['l1']),
            ('link[id*="1"]', ['l1']),
            ('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
            ('div[id*="1"]', ['data1']),
            ('[id*="noending"]', []),
            # New for this test
            ('[href*="."]', ['bob', 'me', 'l1']),
            ('a[href*="."]', ['bob', 'me']),
            ('link[href*="."]', ['l1']),
            ('div[id*="n"]', ['main', 'inner']),
            ('div[id*="nn"]', ['inner']),
            ('div[data-tag*="edval"]', ['data1'])
        )
    def test_attribute_exact_or_hypen(self):
        self.assert_select_multiple(
            ('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
            ('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
            ('p[lang|="fr"]', ['lang-fr']),
            ('p[lang|="gb"]', []),
        )
    def test_attribute_exists(self):
        self.assert_select_multiple(
            ('[rel]', ['l1', 'bob', 'me']),
            ('link[rel]', ['l1']),
            ('a[rel]', ['bob', 'me']),
            ('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
            ('p[class]', ['p1', 'pmulti']),
            ('[blah]', []),
            ('p[blah]', []),
            ('div[data-tag]', ['data1'])
        )
    def test_quoted_space_in_selector_name(self):
        html = """<div style="display: wrong">nope</div>
        <div style="display: right">yes</div>
        """
        soup = BeautifulSoup(html, 'html.parser')
        [chosen] = soup.select('div[style="display: right"]')
        assert "yes" == chosen.string
    def test_unsupported_pseudoclass(self):
        with pytest.raises(NotImplementedError):
            self.soup.select("a:no-such-pseudoclass")
        with pytest.raises(SelectorSyntaxError):
            self.soup.select("a:nth-of-type(a)")
    def test_nth_of_type(self):
        # Try to select first paragraph
        els = self.soup.select('div#inner p:nth-of-type(1)')
        assert len(els) == 1
        assert els[0].string == 'Some text'
        # Try to select third paragraph
        els = self.soup.select('div#inner p:nth-of-type(3)')
        assert len(els) == 1
        assert els[0].string == 'Another'
        # Try to select (non-existent!) fourth paragraph
        els = self.soup.select('div#inner p:nth-of-type(4)')
        assert len(els) == 0
        # Zero will select no tags.
        els = self.soup.select('div p:nth-of-type(0)')
        assert len(els) == 0
    def test_nth_of_type_direct_descendant(self):
        els = self.soup.select('div#inner > p:nth-of-type(1)')
        assert len(els) == 1
        assert els[0].string == 'Some text'
    def test_id_child_selector_nth_of_type(self):
        self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
    def test_select_on_element(self):
        # Other tests operate on the tree; this operates on an element
        # within the tree.
        inner = self.soup.find("div", id="main")
        selected = inner.select("div")
        # The <div id="inner"> tag was selected. The <div id="footer">
        # tag was not.
        self.assert_selects_ids(selected, ['inner', 'data1'])
    def test_overspecified_child_id(self):
        self.assert_selects(".fancy #inner", ['inner'])
        self.assert_selects(".normal #inner", [])
    def test_adjacent_sibling_selector(self):
        self.assert_selects('#p1 + h2', ['header2'])
        self.assert_selects('#p1 + h2 + p', ['pmulti'])
        self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
        assert [] == self.soup.select('#p1 + p')
    def test_general_sibling_selector(self):
        self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
        self.assert_selects('#p1 ~ #header2', ['header2'])
        self.assert_selects('#p1 ~ h2 + a', ['me'])
        self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
        assert [] == self.soup.select('#inner ~ h2')
    def test_dangling_combinator(self):
        with pytest.raises(SelectorSyntaxError):
            self.soup.select('h1 >')
    def test_sibling_combinator_wont_select_same_tag_twice(self):
        self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
    # Test the selector grouping operator (the comma)
    def test_multiple_select(self):
        self.assert_selects('x, y', ['xid', 'yid'])
    def test_multiple_select_with_no_space(self):
        self.assert_selects('x,y', ['xid', 'yid'])
    def test_multiple_select_with_more_space(self):
        self.assert_selects('x,    y', ['xid', 'yid'])
    def test_multiple_select_duplicated(self):
        self.assert_selects('x, x', ['xid'])
    def test_multiple_select_sibling(self):
        self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
    def test_multiple_select_tag_and_direct_descendant(self):
        self.assert_selects('x, y > z', ['xid', 'zidb'])
    def test_multiple_select_direct_descendant_and_tags(self):
        self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
    def test_multiple_select_indirect_descendant(self):
        self.assert_selects('div x,y,  z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
    def test_invalid_multiple_select(self):
        with pytest.raises(SelectorSyntaxError):
            self.soup.select(',x, y')
        with pytest.raises(SelectorSyntaxError):
            self.soup.select('x,,y')
    def test_multiple_select_attrs(self):
        self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
    def test_multiple_select_ids(self):
        self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
    def test_multiple_select_nested(self):
        self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
    def test_select_duplicate_elements(self):
        # When markup contains duplicate elements, a multiple select
        # will find all of them.
        markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
        soup = BeautifulSoup(markup, 'html.parser')
        selected = soup.select(".c1, .c2")
        assert 3 == len(selected)
        # Verify that find_all finds the same elements, though because
        # of an implementation detail it finds them in a different
        # order.
        for element in soup.find_all(class_=['c1', 'c2']):
            assert element in selected
    def test_closest(self):
        inner = self.soup.find("div", id="inner")
        closest = inner.css.closest("div[id=main]")
        assert closest == self.soup.find("div", id="main")
    def test_match(self):
        inner = self.soup.find("div", id="inner")
        main = self.soup.find("div", id="main")
        assert inner.css.match("div[id=main]") == False
        assert main.css.match("div[id=main]") == True
    def test_iselect(self):
        gen = self.soup.css.iselect("h2")
        assert isinstance(gen, types.GeneratorType)
        [header2, header3] = gen
        assert header2['id'] == 'header2'
        assert header3['id'] == 'header3'
    def test_filter(self):
        inner = self.soup.find("div", id="inner")
        results = inner.css.filter("h2")
        assert len(inner.css.filter("h2")) == 2
        results = inner.css.filter("h2[id=header3]")
        assert isinstance(results, ResultSet)
        [result] = results
        assert result['id'] == 'header3'
    def test_escape(self):
        m = self.soup.css.escape
        assert m(".foo#bar") == '\\.foo\\#bar'
        assert m("()[]{}") == '\\(\\)\\[\\]\\{\\}'
        assert m(".foo") == self.soup.css.escape(".foo")
--- a/lib/bs4/tests/test_formatter.py
+++ b/lib/bs4/tests/test_formatter.py
@ -80,20 +80,20 @@ class TestFormatter(SoupTest):
    @pytest.mark.parametrize(
        "indent,expect",
        [
-            (None, '<a>\n<b>\ntext\n</b>\n</a>'),
+            (None, '<a>\n<b>\ntext\n</b>\n</a>\n'),
-            (-1, '<a>\n<b>\ntext\n</b>\n</a>'),
+            (-1, '<a>\n<b>\ntext\n</b>\n</a>\n'),
-            (0, '<a>\n<b>\ntext\n</b>\n</a>'),
+            (0, '<a>\n<b>\ntext\n</b>\n</a>\n'),
-            ("", '<a>\n<b>\ntext\n</b>\n</a>'),
+            ("", '<a>\n<b>\ntext\n</b>\n</a>\n'),
-            (1, '<a>\n <b>\n  text\n </b>\n</a>'),
+            (1, '<a>\n <b>\n  text\n </b>\n</a>\n'),
-            (2, '<a>\n  <b>\n    text\n  </b>\n</a>'),
+            (2, '<a>\n  <b>\n    text\n  </b>\n</a>\n'),
-            ("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>'),
+            ("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>\n'),
-            ('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>'),
+            ('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>\n'),
            # Some invalid inputs -- the default behavior is used.
-            (object(), '<a>\n <b>\n  text\n </b>\n</a>'),
+            (object(), '<a>\n <b>\n  text\n </b>\n</a>\n'),
-            (b'bytes', '<a>\n <b>\n  text\n </b>\n</a>'),
+            (b'bytes', '<a>\n <b>\n  text\n </b>\n</a>\n'),
        ]
    )
    def test_indent(self, indent, expect):
--- a/lib/bs4/tests/test_fuzz.py
+++ b/lib/bs4/tests/test_fuzz.py
@ -0,0 +1,91 @@
 """This file contains test cases reported by third parties using
 fuzzing tools, primarily from Google's oss-fuzz project. Some of these
 represent real problems with Beautiful Soup, but many are problems in
 libraries that Beautiful Soup depends on, and many of the test cases
 represent different ways of triggering the same problem.
 Grouping these test cases together makes it easy to see which test
 cases represent the same problem, and puts the test cases in close
 proximity to code that can trigger the problems.
 """
 import os
 import pytest
 from bs4 import (
    BeautifulSoup,
    ParserRejectedMarkup,
 )
 class TestFuzz(object):
    # Test case markup files from fuzzers are given this extension so
    # they can be included in builds.
    TESTCASE_SUFFIX = ".testcase"
    # This class of error has been fixed by catching a less helpful
    # exception from html.parser and raising ParserRejectedMarkup
    # instead.
    @pytest.mark.parametrize(
        "filename", [
            "clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
        ]
    )
    def test_rejected_markup(self, filename):
        markup = self.__markup(filename)
        with pytest.raises(ParserRejectedMarkup):
            BeautifulSoup(markup, 'html.parser')
    # This class of error has to do with very deeply nested documents
    # which overflow the Python call stack when the tree is converted
    # to a string. This is an issue with Beautiful Soup which was fixed
    # as part of [bug=1471755].
    @pytest.mark.parametrize(
        "filename", [
            "clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
            "clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632",
            "clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464",
            "clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
        ]
    )
    def test_deeply_nested_document(self, filename):
        # Parsing the document and encoding it back to a string is
        # sufficient to demonstrate that the overflow problem has
        # been fixed.
        markup = self.__markup(filename)
        BeautifulSoup(markup, 'html.parser').encode()
    # This class of error represents problems with html5lib's parser,
    # not Beautiful Soup. I use
    # https://github.com/html5lib/html5lib-python/issues/568 to notify
    # the html5lib developers of these issues.
    @pytest.mark.skip("html5lib problems")
    @pytest.mark.parametrize(
        "filename", [
            # b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
            "clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320",
            # b')<a><math><TR><a><mI><a><p><a>'
            "clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456",
            # b'-<math><sElect><mi><sElect><sElect>'
            "clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896",
            # b'ñ<table><svg><html>'
            "clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224",
            # <TABLE>, some ^@ characters, some <math> tags.
            "clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744",
            # Nested table
            "crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
        ]
    )
    def test_html5lib_parse_errors(self, filename):
        markup = self.__markup(filename)
        print(BeautifulSoup(markup, 'html5lib').encode())
    def __markup(self, filename):
        if not filename.endswith(self.TESTCASE_SUFFIX):
            filename += self.TESTCASE_SUFFIX
        this_dir = os.path.split(__file__)[0]
        path = os.path.join(this_dir, 'fuzz', filename)
        return open(path, 'rb').read()
--- a/lib/bs4/tests/test_htmlparser.py
+++ b/lib/bs4/tests/test_htmlparser.py
@ -3,9 +3,11 @@ trees."""
 from pdb import set_trace
 import pickle
 import pytest
 import warnings
 from bs4.builder import (
    HTMLParserTreeBuilder,
    ParserRejectedMarkup,
    XMLParsedAsHTMLWarning,
 )
 from bs4.builder._htmlparser import BeautifulSoupHTMLParser
@ -15,6 +17,28 @@ class TestHTMLParserTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
    default_builder = HTMLParserTreeBuilder
    def test_rejected_input(self):
        # Python's html.parser will occasionally reject markup,
        # especially when there is a problem with the initial DOCTYPE
        # declaration. Different versions of Python sound the alarm in
        # different ways, but Beautiful Soup consistently raises
        # errors as ParserRejectedMarkup exceptions.
        bad_markup = [
            # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
            # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
            # https://github.com/python/cpython/issues/81928
            b'\n<![\xff\xfe\xfe\xcd\x00',
            #https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
            # https://github.com/python/cpython/issues/78661
            #
            b'<![n\x00',
            b"<![UNKNOWN[]]>",
        ]
        for markup in bad_markup:
            with pytest.raises(ParserRejectedMarkup):
                soup = self.soup(markup)
    def test_namespaced_system_doctype(self):
        # html.parser can't handle namespaced doctypes, so skip this one.
        pass
--- a/lib/bs4/tests/test_lxml.py
+++ b/lib/bs4/tests/test_lxml.py
@ -189,13 +189,15 @@ class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
        assert soup.find('prefix:tag3').name == 'tag3'
        assert soup.subtag.find('prefix:tag3').name == 'tag3'
-    def test_pickle_removes_builder(self):
+    def test_pickle_restores_builder(self):
-        # The lxml TreeBuilder is not picklable, so it won't be
+        # The lxml TreeBuilder is not picklable, so when unpickling
-        # preserved in a pickle/unpickle operation.
+        # a document created with it, a new TreeBuilder of the
-
+        # appropriate class is created.
        soup = self.soup("<a>some markup</a>")
        assert isinstance(soup.builder, self.default_builder)
        pickled = pickle.dumps(soup)
        unpickled = pickle.loads(pickled)
        assert "some markup" == unpickled.a.string
-        assert unpickled.builder is None
+        assert unpickled.builder != soup.builder
        assert isinstance(unpickled.builder, self.default_builder)
--- a/lib/bs4/tests/test_pageelement.py
+++ b/lib/bs4/tests/test_pageelement.py
@ -2,20 +2,18 @@
 import copy
 import pickle
 import pytest
 import sys
 from bs4 import BeautifulSoup
 from bs4.element import (
    Comment,
    ResultSet,
    SoupStrainer,
 )
 from . import (
    SoupTest,
    SOUP_SIEVE_PRESENT,
 )
 if SOUP_SIEVE_PRESENT:
    from soupsieve import SelectorSyntaxError
 class TestEncoding(SoupTest):
    """Test the ability to encode objects into strings."""
@ -51,10 +49,21 @@ class TestEncoding(SoupTest):
        assert "\N{SNOWMAN}".encode("utf8") == soup.b.encode_contents(
            encoding="utf8"
        )
    def test_encode_deeply_nested_document(self):
        # This test verifies that encoding a string doesn't involve
        # any recursive function calls. If it did, this test would
        # overflow the Python interpreter stack.
        limit = sys.getrecursionlimit() + 1
        markup = "<span>" * limit
        soup = self.soup(markup)
        encoded = soup.encode()
        assert limit == encoded.count(b"<span>")
    def test_deprecated_renderContents(self):
        html = "<b>\N{SNOWMAN}</b>"
        soup = self.soup(html)
        soup.renderContents()
        assert "\N{SNOWMAN}".encode("utf8") == soup.b.renderContents()
    def test_repr(self):
@ -159,7 +168,31 @@ class TestFormatters(SoupTest):
        soup = self.soup("<div>  foo  <pre>  \tbar\n  \n  </pre>  baz  <textarea> eee\nfff\t</textarea></div>")
        # Everything outside the <pre> tag is reformatted, but everything
        # inside is left alone.
-        assert '<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify()
+        assert '<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
    def test_prettify_handles_nested_string_literal_tags(self):
        # Most of this markup is inside a <pre> tag, so prettify()
        # only does three things to it:
        # 1. Add a newline and a space between the <div> and the <pre>
        # 2. Add a newline after the </pre>
        # 3. Add a newline at the end.
        #
        # The contents of the <pre> tag are left completely alone.  In
        # particular, we don't start adding whitespace again once we
        # encounter the first </pre> tag, because we know it's not
        # the one that put us into string literal mode.
        markup = """<div><pre><code>some
 <script><pre>code</pre></script> for you 
 </code></pre></div>"""
        expect = """<div>
 <pre><code>some
 <script><pre>code</pre></script> for you 
 </code></pre>
 </div>
 """
        soup = self.soup(markup)
        assert expect == soup.div.prettify()
    def test_prettify_accepts_formatter_function(self):
        soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
@ -216,429 +249,6 @@ class TestFormatters(SoupTest):
        assert soup.contents[0].name == 'pre'
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
 class TestCSSSelectors(SoupTest):
    """Test basic CSS selector functionality.
    This functionality is implemented in soupsieve, which has a much
    more comprehensive test suite, so this is basically an extra check
    that soupsieve works as expected.
    """
    HTML = """
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
 "http://www.w3.org/TR/html4/strict.dtd">
 <html>
 <head>
 <title>The title</title>
 <link rel="stylesheet" href="blah.css" type="text/css" id="l1">
 </head>
 <body>
 <custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
 <div id="main" class="fancy">
 <div id="inner">
 <h1 id="header1">An H1</h1>
 <p>Some text</p>
 <p class="onep" id="p1">Some more text</p>
 <h2 id="header2">An H2</h2>
 <p class="class1 class2 class3" id="pmulti">Another</p>
 <a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
 <h2 id="header3">Another H2</h2>
 <a id="me" href="http://simonwillison.net/" rel="me">me</a>
 <span class="s1">
 <a href="#" id="s1a1">span1a1</a>
 <a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
 <span class="span2">
 <a href="#" id="s2a1">span2a1</a>
 </span>
 <span class="span3"></span>
 <custom-dashed-tag class="dashed" id="dash2"/>
 <div data-tag="dashedvalue" id="data1"/>
 </span>
 </div>
 <x id="xid">
 <z id="zida"/>
 <z id="zidab"/>
 <z id="zidac"/>
 </x>
 <y id="yid">
 <z id="zidb"/>
 </y>
 <p lang="en" id="lang-en">English</p>
 <p lang="en-gb" id="lang-en-gb">English UK</p>
 <p lang="en-us" id="lang-en-us">English US</p>
 <p lang="fr" id="lang-fr">French</p>
 </div>
 <div id="footer">
 </div>
 """
    def setup_method(self):
        self.soup = BeautifulSoup(self.HTML, 'html.parser')
    def assert_selects(self, selector, expected_ids, **kwargs):
        el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
        el_ids.sort()
        expected_ids.sort()
        assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
                selector, ', '.join(expected_ids), ', '.join(el_ids)
        )
    assertSelect = assert_selects
    def assert_select_multiple(self, *tests):
        for selector, expected_ids in tests:
            self.assert_selects(selector, expected_ids)
    def test_one_tag_one(self):
        els = self.soup.select('title')
        assert len(els) == 1
        assert els[0].name == 'title'
        assert els[0].contents == ['The title']
    def test_one_tag_many(self):
        els = self.soup.select('div')
        assert len(els) == 4
        for div in els:
            assert div.name == 'div'
        el = self.soup.select_one('div')
        assert 'main' == el['id']
    def test_select_one_returns_none_if_no_match(self):
        match = self.soup.select_one('nonexistenttag')
        assert None == match
    def test_tag_in_tag_one(self):
        els = self.soup.select('div div')
        self.assert_selects('div div', ['inner', 'data1'])
    def test_tag_in_tag_many(self):
        for selector in ('html div', 'html body div', 'body div'):
            self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
    def test_limit(self):
        self.assert_selects('html div', ['main'], limit=1)
        self.assert_selects('html body div', ['inner', 'main'], limit=2)
        self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
                           limit=10)
    def test_tag_no_match(self):
        assert len(self.soup.select('del')) == 0
    def test_invalid_tag(self):
        with pytest.raises(SelectorSyntaxError):
            self.soup.select('tag%t')
    def test_select_dashed_tag_ids(self):
        self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
    def test_select_dashed_by_id(self):
        dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
        assert dashed[0].name == 'custom-dashed-tag'
        assert dashed[0]['id'] == 'dash2'
    def test_dashed_tag_text(self):
        assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
    def test_select_dashed_matches_find_all(self):
        assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
    def test_header_tags(self):
        self.assert_select_multiple(
            ('h1', ['header1']),
            ('h2', ['header2', 'header3']),
        )
    def test_class_one(self):
        for selector in ('.onep', 'p.onep', 'html p.onep'):
            els = self.soup.select(selector)
            assert len(els) == 1
            assert els[0].name == 'p'
            assert els[0]['class'] == ['onep']
    def test_class_mismatched_tag(self):
        els = self.soup.select('div.onep')
        assert len(els) == 0
    def test_one_id(self):
        for selector in ('div#inner', '#inner', 'div div#inner'):
            self.assert_selects(selector, ['inner'])
    def test_bad_id(self):
        els = self.soup.select('#doesnotexist')
        assert len(els) == 0
    def test_items_in_id(self):
        els = self.soup.select('div#inner p')
        assert len(els) == 3
        for el in els:
            assert el.name == 'p'
        assert els[1]['class'] == ['onep']
        assert not els[0].has_attr('class')
    def test_a_bunch_of_emptys(self):
        for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
            assert len(self.soup.select(selector)) == 0
    def test_multi_class_support(self):
        for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
            '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
            self.assert_selects(selector, ['pmulti'])
    def test_multi_class_selection(self):
        for selector in ('.class1.class3', '.class3.class2',
                         '.class1.class2.class3'):
            self.assert_selects(selector, ['pmulti'])
    def test_child_selector(self):
        self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
        self.assert_selects('.s1 > a span', ['s1a2s1'])
    def test_child_selector_id(self):
        self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
    def test_attribute_equals(self):
        self.assert_select_multiple(
            ('p[class="onep"]', ['p1']),
            ('p[id="p1"]', ['p1']),
            ('[class="onep"]', ['p1']),
            ('[id="p1"]', ['p1']),
            ('link[rel="stylesheet"]', ['l1']),
            ('link[type="text/css"]', ['l1']),
            ('link[href="blah.css"]', ['l1']),
            ('link[href="no-blah.css"]', []),
            ('[rel="stylesheet"]', ['l1']),
            ('[type="text/css"]', ['l1']),
            ('[href="blah.css"]', ['l1']),
            ('[href="no-blah.css"]', []),
            ('p[href="no-blah.css"]', []),
            ('[href="no-blah.css"]', []),
        )
    def test_attribute_tilde(self):
        self.assert_select_multiple(
            ('p[class~="class1"]', ['pmulti']),
            ('p[class~="class2"]', ['pmulti']),
            ('p[class~="class3"]', ['pmulti']),
            ('[class~="class1"]', ['pmulti']),
            ('[class~="class2"]', ['pmulti']),
            ('[class~="class3"]', ['pmulti']),
            ('a[rel~="friend"]', ['bob']),
            ('a[rel~="met"]', ['bob']),
            ('[rel~="friend"]', ['bob']),
            ('[rel~="met"]', ['bob']),
        )
    def test_attribute_startswith(self):
        self.assert_select_multiple(
            ('[rel^="style"]', ['l1']),
            ('link[rel^="style"]', ['l1']),
            ('notlink[rel^="notstyle"]', []),
            ('[rel^="notstyle"]', []),
            ('link[rel^="notstyle"]', []),
            ('link[href^="bla"]', ['l1']),
            ('a[href^="http://"]', ['bob', 'me']),
            ('[href^="http://"]', ['bob', 'me']),
            ('[id^="p"]', ['pmulti', 'p1']),
            ('[id^="m"]', ['me', 'main']),
            ('div[id^="m"]', ['main']),
            ('a[id^="m"]', ['me']),
            ('div[data-tag^="dashed"]', ['data1'])
        )
    def test_attribute_endswith(self):
        self.assert_select_multiple(
            ('[href$=".css"]', ['l1']),
            ('link[href$=".css"]', ['l1']),
            ('link[id$="1"]', ['l1']),
            ('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
            ('div[id$="1"]', ['data1']),
            ('[id$="noending"]', []),
        )
    def test_attribute_contains(self):
        self.assert_select_multiple(
            # From test_attribute_startswith
            ('[rel*="style"]', ['l1']),
            ('link[rel*="style"]', ['l1']),
            ('notlink[rel*="notstyle"]', []),
            ('[rel*="notstyle"]', []),
            ('link[rel*="notstyle"]', []),
            ('link[href*="bla"]', ['l1']),
            ('[href*="http://"]', ['bob', 'me']),
            ('[id*="p"]', ['pmulti', 'p1']),
            ('div[id*="m"]', ['main']),
            ('a[id*="m"]', ['me']),
            # From test_attribute_endswith
            ('[href*=".css"]', ['l1']),
            ('link[href*=".css"]', ['l1']),
            ('link[id*="1"]', ['l1']),
            ('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
            ('div[id*="1"]', ['data1']),
            ('[id*="noending"]', []),
            # New for this test
            ('[href*="."]', ['bob', 'me', 'l1']),
            ('a[href*="."]', ['bob', 'me']),
            ('link[href*="."]', ['l1']),
            ('div[id*="n"]', ['main', 'inner']),
            ('div[id*="nn"]', ['inner']),
            ('div[data-tag*="edval"]', ['data1'])
        )
    def test_attribute_exact_or_hypen(self):
        self.assert_select_multiple(
            ('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
            ('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
            ('p[lang|="fr"]', ['lang-fr']),
            ('p[lang|="gb"]', []),
        )
    def test_attribute_exists(self):
        self.assert_select_multiple(
            ('[rel]', ['l1', 'bob', 'me']),
            ('link[rel]', ['l1']),
            ('a[rel]', ['bob', 'me']),
            ('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
            ('p[class]', ['p1', 'pmulti']),
            ('[blah]', []),
            ('p[blah]', []),
            ('div[data-tag]', ['data1'])
        )
    def test_quoted_space_in_selector_name(self):
        html = """<div style="display: wrong">nope</div>
        <div style="display: right">yes</div>
        """
        soup = BeautifulSoup(html, 'html.parser')
        [chosen] = soup.select('div[style="display: right"]')
        assert "yes" == chosen.string
    def test_unsupported_pseudoclass(self):
        with pytest.raises(NotImplementedError):
            self.soup.select("a:no-such-pseudoclass")
        with pytest.raises(SelectorSyntaxError):
            self.soup.select("a:nth-of-type(a)")
    def test_nth_of_type(self):
        # Try to select first paragraph
        els = self.soup.select('div#inner p:nth-of-type(1)')
        assert len(els) == 1
        assert els[0].string == 'Some text'
        # Try to select third paragraph
        els = self.soup.select('div#inner p:nth-of-type(3)')
        assert len(els) == 1
        assert els[0].string == 'Another'
        # Try to select (non-existent!) fourth paragraph
        els = self.soup.select('div#inner p:nth-of-type(4)')
        assert len(els) == 0
        # Zero will select no tags.
        els = self.soup.select('div p:nth-of-type(0)')
        assert len(els) == 0
    def test_nth_of_type_direct_descendant(self):
        els = self.soup.select('div#inner > p:nth-of-type(1)')
        assert len(els) == 1
        assert els[0].string == 'Some text'
    def test_id_child_selector_nth_of_type(self):
        self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
    def test_select_on_element(self):
        # Other tests operate on the tree; this operates on an element
        # within the tree.
        inner = self.soup.find("div", id="main")
        selected = inner.select("div")
        # The <div id="inner"> tag was selected. The <div id="footer">
        # tag was not.
        self.assert_selects_ids(selected, ['inner', 'data1'])
    def test_overspecified_child_id(self):
        self.assert_selects(".fancy #inner", ['inner'])
        self.assert_selects(".normal #inner", [])
    def test_adjacent_sibling_selector(self):
        self.assert_selects('#p1 + h2', ['header2'])
        self.assert_selects('#p1 + h2 + p', ['pmulti'])
        self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
        assert [] == self.soup.select('#p1 + p')
    def test_general_sibling_selector(self):
        self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
        self.assert_selects('#p1 ~ #header2', ['header2'])
        self.assert_selects('#p1 ~ h2 + a', ['me'])
        self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
        assert [] == self.soup.select('#inner ~ h2')
    def test_dangling_combinator(self):
        with pytest.raises(SelectorSyntaxError):
            self.soup.select('h1 >')
    def test_sibling_combinator_wont_select_same_tag_twice(self):
        self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
    # Test the selector grouping operator (the comma)
    def test_multiple_select(self):
        self.assert_selects('x, y', ['xid', 'yid'])
    def test_multiple_select_with_no_space(self):
        self.assert_selects('x,y', ['xid', 'yid'])
    def test_multiple_select_with_more_space(self):
        self.assert_selects('x,    y', ['xid', 'yid'])
    def test_multiple_select_duplicated(self):
        self.assert_selects('x, x', ['xid'])
    def test_multiple_select_sibling(self):
        self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
    def test_multiple_select_tag_and_direct_descendant(self):
        self.assert_selects('x, y > z', ['xid', 'zidb'])
    def test_multiple_select_direct_descendant_and_tags(self):
        self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
    def test_multiple_select_indirect_descendant(self):
        self.assert_selects('div x,y,  z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
    def test_invalid_multiple_select(self):
        with pytest.raises(SelectorSyntaxError):
            self.soup.select(',x, y')
        with pytest.raises(SelectorSyntaxError):
            self.soup.select('x,,y')
    def test_multiple_select_attrs(self):
        self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
    def test_multiple_select_ids(self):
        self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
    def test_multiple_select_nested(self):
        self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
    def test_select_duplicate_elements(self):
        # When markup contains duplicate elements, a multiple select
        # will find all of them.
        markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
        soup = BeautifulSoup(markup, 'html.parser')
        selected = soup.select(".c1, .c2")
        assert 3 == len(selected)
        # Verify that find_all finds the same elements, though because
        # of an implementation detail it finds them in a different
        # order.
        for element in soup.find_all(class_=['c1', 'c2']):
            assert element in selected
 class TestPersistence(SoupTest):
    "Testing features like pickle and deepcopy."
@ -668,12 +278,24 @@ class TestPersistence(SoupTest):
        loaded = pickle.loads(dumped)
        assert loaded.__class__ == BeautifulSoup
        assert loaded.decode() == self.tree.decode()
-
+        
    def test_deepcopy_identity(self):
        # Making a deepcopy of a tree yields an identical tree.
        copied = copy.deepcopy(self.tree)
        assert copied.decode() == self.tree.decode()
    def test_copy_deeply_nested_document(self):
        # This test verifies that copy and deepcopy don't involve any
        # recursive function calls. If they did, this test would
        # overflow the Python interpreter stack.
        limit = sys.getrecursionlimit() + 1
        markup = "<span>" * limit
        soup = self.soup(markup)
        copied = copy.copy(soup)
        copied = copy.deepcopy(soup)
    def test_copy_preserves_encoding(self):
        soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
        encoding = soup.original_encoding
--- a/lib/bs4/tests/test_soup.py
+++ b/lib/bs4/tests/test_soup.py
@ -24,6 +24,7 @@ from bs4.builder import (
 from bs4.element import (
    Comment,
    SoupStrainer,
    PYTHON_SPECIFIC_ENCODINGS,
    Tag,
    NavigableString,
 )
@ -210,6 +211,47 @@ class TestConstructor(SoupTest):
        assert [] == soup.string_container_stack
 class TestOutput(SoupTest):
    @pytest.mark.parametrize(
        "eventual_encoding,actual_encoding", [
            ("utf-8", "utf-8"),
            ("utf-16", "utf-16"),
        ]
    )
    def test_decode_xml_declaration(self, eventual_encoding, actual_encoding):
        # Most of the time, calling decode() on an XML document will
        # give you a document declaration that mentions the encoding
        # you intend to use when encoding the document as a
        # bytestring.
        soup = self.soup("<tag></tag>")
        soup.is_xml = True
        assert (f'<?xml version="1.0" encoding="{actual_encoding}"?>\n<tag></tag>'
                == soup.decode(eventual_encoding=eventual_encoding))
    @pytest.mark.parametrize(
        "eventual_encoding", [x for x in PYTHON_SPECIFIC_ENCODINGS] + [None]
    )
    def test_decode_xml_declaration_with_missing_or_python_internal_eventual_encoding(self, eventual_encoding):
        # But if you pass a Python internal encoding into decode(), or
        # omit the eventual_encoding altogether, the document
        # declaration won't mention any particular encoding.
        soup = BeautifulSoup("<tag></tag>", "html.parser")
        soup.is_xml = True
        assert (f'<?xml version="1.0"?>\n<tag></tag>'
                == soup.decode(eventual_encoding=eventual_encoding))
    def test(self):
        # BeautifulSoup subclasses Tag and extends the decode() method.
        # Make sure the other Tag methods which call decode() call
        # it correctly.
        soup = self.soup("<tag></tag>")
        assert b"<tag></tag>" == soup.encode(encoding="utf-8")
        assert b"<tag></tag>" == soup.encode_contents(encoding="utf-8")
        assert "<tag></tag>" == soup.decode_contents()
        assert "<tag>\n</tag>\n" == soup.prettify()
 class TestWarnings(SoupTest):
    # Note that some of the tests in this class create BeautifulSoup
    # objects directly rather than using self.soup(). That's
--- a/lib/soupsieve/init.py
+++ b/lib/soupsieve/init.py
@ -32,7 +32,7 @@ from . import css_match as cm
 from . import css_types as ct
 from .util import DEBUG, SelectorSyntaxError  # noqa: F401
 import bs4  # type: ignore[import]
-from typing import Optional, Any, Iterator, Iterable
+from typing import Any, Iterator, Iterable
 __all__ = (
    'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@ -45,10 +45,10 @@ SoupSieve = cm.SoupSieve
 def compile(  # noqa: A001
    pattern: str,
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> cm.SoupSieve:
    """Compile CSS pattern."""
@ -79,10 +79,10 @@ def purge() -> None:
 def closest(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> 'bs4.Tag':
    """Match closest ancestor."""
@ -93,10 +93,10 @@ def closest(
 def match(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> bool:
    """Match node."""
@ -107,10 +107,10 @@ def match(
 def filter(  # noqa: A001
    select: str,
    iterable: Iterable['bs4.Tag'],
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> list['bs4.Tag']:
    """Filter list of nodes."""
@ -121,10 +121,10 @@ def filter(  # noqa: A001
 def select_one(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> 'bs4.Tag':
    """Select a single tag."""
@ -135,11 +135,11 @@ def select_one(
 def select(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    limit: int = 0,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> list['bs4.Tag']:
    """Select the specified tags."""
@ -150,11 +150,11 @@ def select(
 def iselect(
    select: str,
    tag: 'bs4.Tag',
-    namespaces: Optional[dict[str, str]] = None,
+    namespaces: dict[str, str] | None = None,
    limit: int = 0,
    flags: int = 0,
    *,
-    custom: Optional[dict[str, str]] = None,
+    custom: dict[str, str] | None = None,
    **kwargs: Any
 ) -> Iterator['bs4.Tag']:
    """Iterate the specified tags."""
--- a/lib/soupsieve/meta.py
+++ b/lib/soupsieve/meta.py
@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
    return Version(major, minor, micro, release, pre, post, dev)
-__version_info__ = Version(2, 4, 0, "final")
+__version_info__ = Version(2, 4, 1, "final")
 __version__ = __version_info__._get_canonical()
--- a/lib/soupsieve/css_match.py
+++ b/lib/soupsieve/css_match.py
@ -6,7 +6,7 @@ import re
 from . import css_types as ct
 import unicodedata
 import bs4  # type: ignore[import]
-from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast  # noqa: F401
+from typing import Iterator, Iterable, Any, Callable, Sequence, cast  # noqa: F401
 # Empty tag pattern (whitespace okay)
 RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -171,7 +171,7 @@ class _DocumentNav:
    def get_children(
        self,
        el: bs4.Tag,
-        start: Optional[int] = None,
+        start: int | None = None,
        reverse: bool = False,
        tags: bool = True,
        no_iframe: bool = False
@ -239,22 +239,22 @@ class _DocumentNav:
        return parent
    @staticmethod
-    def get_tag_name(el: bs4.Tag) -> Optional[str]:
+    def get_tag_name(el: bs4.Tag) -> str | None:
        """Get tag."""
-        return cast(Optional[str], el.name)
+        return cast('str | None', el.name)
    @staticmethod
-    def get_prefix_name(el: bs4.Tag) -> Optional[str]:
+    def get_prefix_name(el: bs4.Tag) -> str | None:
        """Get prefix."""
-        return cast(Optional[str], el.prefix)
+        return cast('str | None', el.prefix)
    @staticmethod
-    def get_uri(el: bs4.Tag) -> Optional[str]:
+    def get_uri(el: bs4.Tag) -> str | None:
        """Get namespace `URI`."""
-        return cast(Optional[str], el.namespace)
+        return cast('str | None', el.namespace)
    @classmethod
    def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
@ -287,7 +287,7 @@ class _DocumentNav:
        return bool(ns and ns == NS_XHTML)
    @staticmethod
-    def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]:
+    def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
        """Return namespace and attribute name without the prefix."""
        return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@ -330,8 +330,8 @@ class _DocumentNav:
        cls,
        el: bs4.Tag,
        name: str,
-        default: Optional[str | Sequence[str]] = None
+        default: str | Sequence[str] | None = None
-    ) -> Optional[str | Sequence[str]]:
+    ) -> str | Sequence[str] | None:
        """Get attribute by name."""
        value = default
@ -348,7 +348,7 @@ class _DocumentNav:
        return value
    @classmethod
-    def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]:
+    def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
        """Iterate attributes."""
        for k, v in el.attrs.items():
@ -424,10 +424,10 @@ class Inputs:
        return 0 <= minutes <= 59
    @classmethod
-    def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]:
+    def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
        """Parse the input value."""
-        parsed = None  # type: Optional[tuple[float, ...]]
+        parsed = None  # type: tuple[float, ...] | None
        if value is None:
            return value
        if itype == "date":
@ -486,7 +486,7 @@ class CSSMatch(_DocumentNav):
        self,
        selectors: ct.SelectorList,
        scope: bs4.Tag,
-        namespaces: Optional[ct.Namespaces],
+        namespaces: ct.Namespaces | None,
        flags: int
    ) -> None:
        """Initialize."""
@ -545,19 +545,19 @@ class CSSMatch(_DocumentNav):
        return self.get_tag_ns(el) == NS_XHTML
-    def get_tag(self, el: bs4.Tag) -> Optional[str]:
+    def get_tag(self, el: bs4.Tag) -> str | None:
        """Get tag."""
        name = self.get_tag_name(el)
        return util.lower(name) if name is not None and not self.is_xml else name
-    def get_prefix(self, el: bs4.Tag) -> Optional[str]:
+    def get_prefix(self, el: bs4.Tag) -> str | None:
        """Get prefix."""
        prefix = self.get_prefix_name(el)
        return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
-    def find_bidi(self, el: bs4.Tag) -> Optional[int]:
+    def find_bidi(self, el: bs4.Tag) -> int | None:
        """Get directionality from element text."""
        for node in self.get_children(el, tags=False):
@ -653,8 +653,8 @@ class CSSMatch(_DocumentNav):
        self,
        el: bs4.Tag,
        attr: str,
-        prefix: Optional[str]
+        prefix: str | None
-    ) -> Optional[str | Sequence[str]]:
+    ) -> str | Sequence[str] | None:
        """Match attribute name and return value if it exists."""
        value = None
@ -751,7 +751,7 @@ class CSSMatch(_DocumentNav):
            name not in (self.get_tag(el), '*')
        )
-    def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool:
+    def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
        """Match the tag."""
        match = True
@ -1030,7 +1030,7 @@ class CSSMatch(_DocumentNav):
        """Match element if it contains text."""
        match = True
-        content = None  # type: Optional[str | Sequence[str]]
+        content = None  # type: str | Sequence[str] | None
        for contain_list in contains:
            if content is None:
                if contain_list.own:
@ -1099,7 +1099,7 @@ class CSSMatch(_DocumentNav):
        match = False
        name = cast(str, self.get_attribute_by_name(el, 'name'))
-        def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]:
+        def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
            """Find this input's form."""
            form = None
            parent = self.get_parent(el, no_iframe=True)
@ -1478,7 +1478,7 @@ class CSSMatch(_DocumentNav):
                    if lim < 1:
                        break
-    def closest(self) -> Optional[bs4.Tag]:
+    def closest(self) -> bs4.Tag | None:
        """Match closest ancestor."""
        current = self.tag
@ -1506,7 +1506,7 @@ class SoupSieve(ct.Immutable):
    pattern: str
    selectors: ct.SelectorList
-    namespaces: Optional[ct.Namespaces]
+    namespaces: ct.Namespaces | None
    custom: dict[str, str]
    flags: int
@ -1516,8 +1516,8 @@ class SoupSieve(ct.Immutable):
        self,
        pattern: str,
        selectors: ct.SelectorList,
-        namespaces: Optional[ct.Namespaces],
+        namespaces: ct.Namespaces | None,
-        custom: Optional[ct.CustomSelectors],
+        custom: ct.CustomSelectors | None,
        flags: int
    ):
        """Initialize."""
--- a/lib/soupsieve/css_parser.py
+++ b/lib/soupsieve/css_parser.py
@ -7,7 +7,7 @@ from . import css_match as cm
 from . import css_types as ct
 from .util import SelectorSyntaxError
 import warnings
-from typing import Optional, Match, Any, Iterator, cast
+from typing import Match, Any, Iterator, cast
 UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -113,7 +113,7 @@ VALUE = r'''
 '''.format(nl=NEWLINE, ident=IDENTIFIER)
 # Attribute value comparison. `!=` is handled special as it is non-standard.
 ATTR = r'''
-(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
+(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
 '''.format(ws=WSC, value=VALUE)
 # Selector patterns
@ -207,8 +207,8 @@ _MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE)
 def _cached_css_compile(
    pattern: str,
-    namespaces: Optional[ct.Namespaces],
+    namespaces: ct.Namespaces | None,
-    custom: Optional[ct.CustomSelectors],
+    custom: ct.CustomSelectors | None,
    flags: int
 ) -> cm.SoupSieve:
    """Cached CSS compile."""
@ -233,7 +233,7 @@ def _purge_cache() -> None:
    _cached_css_compile.cache_clear()
-def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
+def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
    """Process custom."""
    custom_selectors = {}
@ -317,7 +317,7 @@ class SelectorPattern:
        return self.name
-    def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
+    def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
        """Match the selector."""
        return self.re_pattern.match(selector, index)
@ -336,7 +336,7 @@ class SpecialPseudoPattern(SelectorPattern):
            for pseudo in p[1]:
                self.patterns[pseudo] = pattern
-        self.matched_name = None  # type: Optional[SelectorPattern]
+        self.matched_name = None  # type: SelectorPattern | None
        self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
    def get_name(self) -> str:
@ -344,7 +344,7 @@ class SpecialPseudoPattern(SelectorPattern):
        return '' if self.matched_name is None else self.matched_name.get_name()
-    def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
+    def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
        """Match the selector."""
        pseudo = None
@ -372,14 +372,14 @@ class _Selector:
    def __init__(self, **kwargs: Any) -> None:
        """Initialize."""
-        self.tag = kwargs.get('tag', None)  # type: Optional[ct.SelectorTag]
+        self.tag = kwargs.get('tag', None)  # type: ct.SelectorTag | None
        self.ids = kwargs.get('ids', [])  # type: list[str]
        self.classes = kwargs.get('classes', [])  # type: list[str]
        self.attributes = kwargs.get('attributes', [])  # type: list[ct.SelectorAttribute]
        self.nth = kwargs.get('nth', [])  # type: list[ct.SelectorNth]
        self.selectors = kwargs.get('selectors', [])  # type: list[ct.SelectorList]
        self.relations = kwargs.get('relations', [])  # type: list[_Selector]
-        self.rel_type = kwargs.get('rel_type', None)  # type: Optional[str]
+        self.rel_type = kwargs.get('rel_type', None)  # type: str | None
        self.contains = kwargs.get('contains', [])  # type: list[ct.SelectorContains]
        self.lang = kwargs.get('lang', [])  # type: list[ct.SelectorLang]
        self.flags = kwargs.get('flags', 0)  # type: int
@ -462,7 +462,7 @@ class CSSParser:
    def __init__(
        self,
        selector: str,
-        custom: Optional[dict[str, str | ct.SelectorList]] = None,
+        custom: dict[str, str | ct.SelectorList] | None = None,
        flags: int = 0
    ) -> None:
        """Initialize."""
--- a/lib/soupsieve/css_types.py
+++ b/lib/soupsieve/css_types.py
@ -2,7 +2,7 @@
 from __future__ import annotations
 import copyreg
 from .pretty import pretty
-from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
+from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
 __all__ = (
    'Selector',
@ -189,28 +189,28 @@ class Selector(Immutable):
        'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
    )
-    tag: Optional[SelectorTag]
+    tag: SelectorTag | None
    ids: tuple[str, ...]
    classes: tuple[str, ...]
    attributes: tuple[SelectorAttribute, ...]
    nth: tuple[SelectorNth, ...]
    selectors: tuple[SelectorList, ...]
    relation: SelectorList
-    rel_type: Optional[str]
+    rel_type: str | None
    contains: tuple[SelectorContains, ...]
    lang: tuple[SelectorLang, ...]
    flags: int
    def __init__(
        self,
-        tag: Optional[SelectorTag],
+        tag: SelectorTag | None,
        ids: tuple[str, ...],
        classes: tuple[str, ...],
        attributes: tuple[SelectorAttribute, ...],
        nth: tuple[SelectorNth, ...],
        selectors: tuple[SelectorList, ...],
        relation: SelectorList,
-        rel_type: Optional[str],
+        rel_type: str | None,
        contains: tuple[SelectorContains, ...],
        lang: tuple[SelectorLang, ...],
        flags: int
@ -247,9 +247,9 @@ class SelectorTag(Immutable):
    __slots__ = ("name", "prefix", "_hash")
    name: str
-    prefix: Optional[str]
+    prefix: str | None
-    def __init__(self, name: str, prefix: Optional[str]) -> None:
+    def __init__(self, name: str, prefix: str | None) -> None:
        """Initialize."""
        super().__init__(name=name, prefix=prefix)
@ -262,15 +262,15 @@ class SelectorAttribute(Immutable):
    attribute: str
    prefix: str
-    pattern: Optional[Pattern[str]]
+    pattern: Pattern[str] | None
-    xml_type_pattern: Optional[Pattern[str]]
+    xml_type_pattern: Pattern[str] | None
    def __init__(
        self,
        attribute: str,
        prefix: str,
-        pattern: Optional[Pattern[str]],
+        pattern: Pattern[str] | None,
-        xml_type_pattern: Optional[Pattern[str]]
+        xml_type_pattern: Pattern[str] | None
    ) -> None:
        """Initialize."""
@ -360,7 +360,7 @@ class SelectorList(Immutable):
    def __init__(
        self,
-        selectors: Optional[Iterable[Selector | SelectorNull]] = None,
+        selectors: Iterable[Selector | SelectorNull] | None = None,
        is_not: bool = False,
        is_html: bool = False
    ) -> None:
--- a/lib/soupsieve/util.py
+++ b/lib/soupsieve/util.py
@ -3,7 +3,7 @@ from __future__ import annotations
 from functools import wraps, lru_cache
 import warnings
 import re
-from typing import Callable, Any, Optional
+from typing import Callable, Any
 DEBUG = 0x00001
@ -27,7 +27,7 @@ def lower(string: str) -> str:
 class SelectorSyntaxError(Exception):
    """Syntax error in a CSS selector."""
-    def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
+    def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
        """Initialize."""
        self.line = None
@ -84,7 +84,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
    col = 1
    text = []  # type: list[str]
    line = 1
-    offset = None  # type: Optional[int]
+    offset = None  # type: int | None
    # Split pattern by newline and handle the text before the newline
    for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
--- a/requirements.txt
+++ b/requirements.txt
@ -4,7 +4,7 @@ arrow==1.2.3
 backports.csv==1.0.7
 backports.functools-lru-cache==1.6.4
 backports.zoneinfo==0.2.1;python_version<"3.9"
-beautifulsoup4==4.11.2
+beautifulsoup4==4.12.2
 bleach==6.0.0
 certifi==2022.12.7
 cheroot==9.0.0