diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py
index fcc27457..2a436d34 100644
--- a/lib/bs4/__init__.py
+++ b/lib/bs4/__init__.py
@@ -1,6 +1,5 @@
-"""Beautiful Soup
-Elixir and Tonic
-"The Screen-Scraper's Friend"
+"""Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend".
+
 http://www.crummy.com/software/BeautifulSoup/
 
 Beautiful Soup uses a pluggable XML or HTML parser to parse a
@@ -8,29 +7,34 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a
 provides methods and Pythonic idioms that make it easy to navigate,
 search, and modify the parse tree.
 
-Beautiful Soup works with Python 2.7 and up. It works better if lxml
+Beautiful Soup works with Python 3.5 and up. It works better if lxml
 and/or html5lib is installed.
 
 For more than you ever wanted to know about Beautiful Soup, see the
-documentation:
-http://www.crummy.com/software/BeautifulSoup/bs4/doc/
-
+documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.8.1"
-__copyright__ = "Copyright (c) 2004-2019 Leonard Richardson"
+__version__ = "4.10.0"
+__copyright__ = "Copyright (c) 2004-2021 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
 
 __all__ = ['BeautifulSoup']
 
+
+from collections import Counter
 import os
 import re
 import sys
 import traceback
 import warnings
 
+# The very first thing we do is give a useful error if someone is
+# running this code under Python 2.
+if sys.version_info.major < 3:
+    raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.')
+
 from .builder import builder_registry, ParserRejectedMarkup
 from .dammit import UnicodeDammit
 from .element import (
@@ -42,28 +46,49 @@ from .element import (
     NavigableString,
     PageElement,
     ProcessingInstruction,
+    PYTHON_SPECIFIC_ENCODINGS,
     ResultSet,
+    Script,
+    Stylesheet,
     SoupStrainer,
     Tag,
+    TemplateString,
     )
 
-# The very first thing we do is give a useful error if someone is
-# running this code under Python 3 without converting it.
-'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+# Define some custom warnings.
+class GuessedAtParserWarning(UserWarning):
+    """The warning issued when BeautifulSoup has to guess what parser to
+    use -- probably because no parser was specified in the constructor.
+    """
+
+class MarkupResemblesLocatorWarning(UserWarning):
+    """The warning issued when BeautifulSoup is given 'markup' that
+    actually looks like a resource locator -- a URL or a path to a file
+    on disk.
+    """
+
 
 class BeautifulSoup(Tag):
-    """
-    This class defines the basic interface called by the tree builders.
+    """A data structure representing a parsed HTML or XML document.
 
-    These methods will be called by the parser:
-      reset()
-      feed(markup)
+    Most of the methods you'll call on a BeautifulSoup object are inherited from
+    PageElement or Tag.
+
+    Internally, this class defines the basic interface called by the
+    tree builders when converting an HTML/XML document into a data
+    structure. The interface abstracts away the differences between
+    parsers. To write a new tree builder, you'll need to understand
+    these methods as a whole.
+
+    These methods will be called by the BeautifulSoup constructor:
+      * reset()
+      * feed(markup)
 
     The tree builder may call these methods from its feed() implementation:
-      handle_starttag(name, attrs) # See note about return value
-      handle_endtag(name)
-      handle_data(data) # Appends to the current data node
-      endData(containerClass) # Ends the current data node
+      * handle_starttag(name, attrs) # See note about return value
+      * handle_endtag(name)
+      * handle_data(data) # Appends to the current data node
+      * endData(containerClass) # Ends the current data node
 
     No matter how complicated the underlying parser is, you should be
     able to build a tree using 'start tag' events, 'end tag' events,
@@ -73,68 +98,75 @@ class BeautifulSoup(Tag):
     like HTML's <br> tag), call handle_starttag and then
     handle_endtag.
     """
+
+    # Since BeautifulSoup subclasses Tag, it's possible to treat it as
+    # a Tag with a .name. This name makes it clear the BeautifulSoup
+    # object isn't a real markup tag.
     ROOT_TAG_NAME = '[document]'
 
     # If the end-user gives no indication which tree builder they
     # want, look for one with these features.
     DEFAULT_BUILDER_FEATURES = ['html', 'fast']
-   
+
+    # A string containing all ASCII whitespace characters, used in
+    # endData() to detect data chunks that seem 'empty'.
     ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 
     NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
-
+    
     def __init__(self, markup="", features=None, builder=None,
                  parse_only=None, from_encoding=None, exclude_encodings=None,
                  element_classes=None, **kwargs):
         """Constructor.
 
         :param markup: A string or a file-like object representing
-        markup to be parsed.
+         markup to be parsed.
 
-        :param features: Desirable features of the parser to be used. This
-        may be the name of a specific parser ("lxml", "lxml-xml",
-        "html.parser", or "html5lib") or it may be the type of markup
-        to be used ("html", "html5", "xml"). It's recommended that you
-        name a specific parser, so that Beautiful Soup gives you the
-        same results across platforms and virtual environments.
+        :param features: Desirable features of the parser to be
+         used. This may be the name of a specific parser ("lxml",
+         "lxml-xml", "html.parser", or "html5lib") or it may be the
+         type of markup to be used ("html", "html5", "xml"). It's
+         recommended that you name a specific parser, so that
+         Beautiful Soup gives you the same results across platforms
+         and virtual environments.
 
         :param builder: A TreeBuilder subclass to instantiate (or
-        instance to use) instead of looking one up based on
-        `features`. You only need to use this if you've implemented a
-        custom TreeBuilder.
+         instance to use) instead of looking one up based on
+         `features`. You only need to use this if you've implemented a
+         custom TreeBuilder.
 
         :param parse_only: A SoupStrainer. Only parts of the document
-        matching the SoupStrainer will be considered. This is useful
-        when parsing part of a document that would otherwise be too
-        large to fit into memory.
+         matching the SoupStrainer will be considered. This is useful
+         when parsing part of a document that would otherwise be too
+         large to fit into memory.
 
         :param from_encoding: A string indicating the encoding of the
-        document to be parsed. Pass this in if Beautiful Soup is
-        guessing wrongly about the document's encoding.
+         document to be parsed. Pass this in if Beautiful Soup is
+         guessing wrongly about the document's encoding.
 
         :param exclude_encodings: A list of strings indicating
-        encodings known to be wrong. Pass this in if you don't know
-        the document's encoding but you know Beautiful Soup's guess is
-        wrong.
+         encodings known to be wrong. Pass this in if you don't know
+         the document's encoding but you know Beautiful Soup's guess is
+         wrong.
 
         :param element_classes: A dictionary mapping BeautifulSoup
-        classes like Tag and NavigableString to other classes you'd
-        like to be instantiated instead as the parse tree is
-        built. This is useful for using subclasses to modify the
-        default behavior of Tag or NavigableString.
+         classes like Tag and NavigableString, to other classes you'd
+         like to be instantiated instead as the parse tree is
+         built. This is useful for subclassing Tag or NavigableString
+         to modify default behavior.
 
         :param kwargs: For backwards compatibility purposes, the
-        constructor accepts certain keyword arguments used in
-        Beautiful Soup 3. None of these arguments do anything in
-        Beautiful Soup 4; they will result in a warning and then be ignored.
-
-        Apart from this, any keyword arguments passed into the BeautifulSoup
-        constructor are propagated to the TreeBuilder constructor. This
-        makes it possible to configure a TreeBuilder beyond saying
-        which one to use.
-
+         constructor accepts certain keyword arguments used in
+         Beautiful Soup 3. None of these arguments do anything in
+         Beautiful Soup 4; they will result in a warning and then be
+         ignored.
+         
+         Apart from this, any keyword arguments passed into the
+         BeautifulSoup constructor are propagated to the TreeBuilder
+         constructor. This makes it possible to configure a
+         TreeBuilder by passing in arguments, not just by saying which
+         one to use.
         """
-
         if 'convertEntities' in kwargs:
             del kwargs['convertEntities']
             warnings.warn(
@@ -223,7 +255,9 @@ class BeautifulSoup(Tag):
             if not original_builder and not (
                     original_features == builder.NAME or
                     original_features in builder.ALTERNATE_NAMES
-            ):
+            ) and markup:
+                # The user did not tell us which TreeBuilder to use,
+                # and we had to guess. Issue a warning.
                 if builder.is_xml:
                     markup_type = "XML"
                 else:
@@ -257,7 +291,10 @@ class BeautifulSoup(Tag):
                         parser=builder.NAME,
                         markup_type=markup_type
                     )
-                    warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
+                    warnings.warn(
+                        self.NO_PARSER_SPECIFIED_WARNING % values,
+                        GuessedAtParserWarning, stacklevel=2
+                    )
         else:
             if kwargs:
                 warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.")
@@ -286,20 +323,32 @@ class BeautifulSoup(Tag):
             else:
                 possible_filename = markup
             is_file = False
+            is_directory = False
             try:
                 is_file = os.path.exists(possible_filename)
+                if is_file:
+                    is_directory = os.path.isdir(possible_filename)
             except Exception as e:
                 # This is almost certainly a problem involving
                 # characters not valid in filenames on this
                 # system. Just let it go.
                 pass
-            if is_file:
-                if isinstance(markup, str):
-                    markup = markup.encode("utf8")
+            if is_directory:
+                warnings.warn(
+                    '"%s" looks like a directory name, not markup. You may'
+                    ' want to open a file found in this directory and pass'
+                    ' the filehandle into Beautiful Soup.' % (
+                        self._decode_markup(markup)
+                    ),
+                    MarkupResemblesLocatorWarning
+                )
+            elif is_file:
                 warnings.warn(
                     '"%s" looks like a filename, not markup. You should'
                     ' probably open this file and pass the filehandle into'
-                    ' Beautiful Soup.' % markup)
+                    ' Beautiful Soup.' % self._decode_markup(markup),
+                    MarkupResemblesLocatorWarning
+                )
             self._check_markup_is_url(markup)
 
         rejections = []
@@ -329,6 +378,7 @@ class BeautifulSoup(Tag):
         self.builder.soup = None
 
     def __copy__(self):
+        """Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
         copy = type(self)(
             self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
         )
@@ -347,11 +397,25 @@ class BeautifulSoup(Tag):
             d['builder'] = None
         return d
 
-    @staticmethod
-    def _check_markup_is_url(markup):
-        """ 
-        Check if markup looks like it's actually a url and raise a warning 
-        if so. Markup can be unicode or str (py2) / bytes (py3).
+    @classmethod
+    def _decode_markup(cls, markup):
+        """Ensure `markup` is bytes so it's safe to send into warnings.warn.
+
+        TODO: warnings.warn had this problem back in 2010 but it might not
+        anymore.
+        """
+        if isinstance(markup, bytes):
+            decoded = markup.decode('utf-8', 'replace')
+        else:
+            decoded = markup
+        return decoded
+
+    @classmethod
+    def _check_markup_is_url(cls, markup):
+        """Error-handling method to raise a warning if incoming markup looks
+        like a URL.
+
+        :param markup: A string.
         """
         if isinstance(markup, bytes):
             space = b' '
@@ -364,18 +428,20 @@ class BeautifulSoup(Tag):
 
         if any(markup.startswith(prefix) for prefix in cant_start_with):
             if not space in markup:
-                if isinstance(markup, bytes):
-                    decoded_markup = markup.decode('utf-8', 'replace')
-                else:
-                    decoded_markup = markup
                 warnings.warn(
                     '"%s" looks like a URL. Beautiful Soup is not an'
                     ' HTTP client. You should probably use an HTTP client like'
                     ' requests to get the document behind the URL, and feed'
-                    ' that document to Beautiful Soup.' % decoded_markup
+                    ' that document to Beautiful Soup.' % cls._decode_markup(
+                        markup
+                    ),
+                    MarkupResemblesLocatorWarning
                 )
 
     def _feed(self):
+        """Internal method that parses previously set markup, creating a large
+        number of Tag and NavigableString objects.
+        """
         # Convert the document to Unicode.
         self.builder.reset()
 
@@ -386,66 +452,110 @@ class BeautifulSoup(Tag):
             self.popTag()
 
     def reset(self):
+        """Reset this object to a state as though it had never parsed any
+        markup.
+        """
         Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
         self.hidden = 1
         self.builder.reset()
         self.current_data = []
         self.currentTag = None
         self.tagStack = []
+        self.open_tag_counter = Counter()
         self.preserve_whitespace_tag_stack = []
+        self.string_container_stack = []
         self.pushTag(self)
 
     def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
                 sourceline=None, sourcepos=None, **kwattrs):
-        """Create a new tag associated with this soup."""
+        """Create a new Tag associated with this BeautifulSoup object.
+
+        :param name: The name of the new Tag.
+        :param namespace: The URI of the new Tag's XML namespace, if any.
+        :param prefix: The prefix for the new Tag's XML namespace, if any.
+        :param attrs: A dictionary of this Tag's attribute values; can
+            be used instead of `kwattrs` for attributes like 'class'
+            that are reserved words in Python.
+        :param sourceline: The line number where this tag was
+            (purportedly) found in its source document.
+        :param sourcepos: The character position within `sourceline` where this
+            tag was (purportedly) found.
+        :param kwattrs: Keyword arguments for the new Tag's attribute values.
+
+        """
         kwattrs.update(attrs)
         return self.element_classes.get(Tag, Tag)(
             None, self.builder, name, namespace, nsprefix, kwattrs,
             sourceline=sourceline, sourcepos=sourcepos
         )
 
-    def new_string(self, s, subclass=None):
-        """Create a new NavigableString associated with this soup."""
-        subclass = subclass or self.element_classes.get(
-            NavigableString, NavigableString
+    def string_container(self, base_class=None):
+        container = base_class or NavigableString
+        
+        # There may be a general override of NavigableString.
+        container = self.element_classes.get(
+            container, container
         )
-        return subclass(s)
 
-    def insert_before(self, successor):
+        # On top of that, we may be inside a tag that needs a special
+        # container class.
+        if self.string_container_stack and container is NavigableString:
+            container = self.builder.string_containers.get(
+                self.string_container_stack[-1].name, container
+            )
+        return container
+        
+    def new_string(self, s, subclass=None):
+        """Create a new NavigableString associated with this BeautifulSoup
+        object.
+        """
+        container = self.string_container(subclass)
+        return container(s)
+
+    def insert_before(self, *args):
+        """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
+        it because there is nothing before or after it in the parse tree.
+        """
         raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
 
-    def insert_after(self, successor):
+    def insert_after(self, *args):
+        """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
+        it because there is nothing before or after it in the parse tree.
+        """
         raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
 
     def popTag(self):
+        """Internal method called by _popToTag when a tag is closed."""
         tag = self.tagStack.pop()
+        if tag.name in self.open_tag_counter:
+            self.open_tag_counter[tag.name] -= 1
         if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
             self.preserve_whitespace_tag_stack.pop()
-        #print "Pop", tag.name
+        if self.string_container_stack and tag == self.string_container_stack[-1]:
+            self.string_container_stack.pop()
+        #print("Pop", tag.name)
         if self.tagStack:
             self.currentTag = self.tagStack[-1]
         return self.currentTag
 
     def pushTag(self, tag):
-        #print "Push", tag.name
+        """Internal method called by handle_starttag when a tag is opened."""
+        #print("Push", tag.name)
         if self.currentTag is not None:
             self.currentTag.contents.append(tag)
         self.tagStack.append(tag)
         self.currentTag = self.tagStack[-1]
+        if tag.name != self.ROOT_TAG_NAME:
+            self.open_tag_counter[tag.name] += 1
         if tag.name in self.builder.preserve_whitespace_tags:
             self.preserve_whitespace_tag_stack.append(tag)
+        if tag.name in self.builder.string_containers:
+            self.string_container_stack.append(tag)
 
     def endData(self, containerClass=None):
-
-        # Default container is NavigableString.
-        containerClass = containerClass or NavigableString
-
-        # The user may want us to instantiate some alias for the
-        # container class.
-        containerClass = self.element_classes.get(
-            containerClass, containerClass
-        )
-        
+        """Method called by the TreeBuilder when the end of a data segment
+        occurs.
+        """       
         if self.current_data:
             current_data = ''.join(self.current_data)
             # If whitespace is not preserved, and this string contains
@@ -472,11 +582,12 @@ class BeautifulSoup(Tag):
                     not self.parse_only.search(current_data)):
                 return
 
+            containerClass = self.string_container(containerClass)
             o = containerClass(current_data)
             self.object_was_parsed(o)
 
     def object_was_parsed(self, o, parent=None, most_recent_element=None):
-        """Add an object to the parse tree."""
+        """Method called by the TreeBuilder to integrate an object into the parse tree."""
         if parent is None:
             parent = self.currentTag
         if most_recent_element is not None:
@@ -545,10 +656,19 @@ class BeautifulSoup(Tag):
 
     def _popToTag(self, name, nsprefix=None, inclusivePop=True):
         """Pops the tag stack up to and including the most recent
-        instance of the given tag. If inclusivePop is false, pops the tag
-        stack up to but *not* including the most recent instqance of
-        the given tag."""
-        #print "Popping to %s" % name
+        instance of the given tag.
+
+        If there are no open tags with the given name, nothing will be
+        popped.
+
+        :param name: Pop up to the most recent tag with this name.
+        :param nsprefix: The namespace prefix that goes with `name`.
+        :param inclusivePop: It this is false, pops the tag stack up
+          to but *not* including the most recent instqance of the
+          given tag.
+
+        """
+        #print("Popping to %s" % name)
         if name == self.ROOT_TAG_NAME:
             # The BeautifulSoup object itself can never be popped.
             return
@@ -557,6 +677,8 @@ class BeautifulSoup(Tag):
 
         stack_size = len(self.tagStack)
         for i in range(stack_size - 1, 0, -1):
+            if not self.open_tag_counter.get(name):
+                break
             t = self.tagStack[i]
             if (name == t.name and nsprefix == t.prefix):
                 if inclusivePop:
@@ -568,15 +690,22 @@ class BeautifulSoup(Tag):
 
     def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
                         sourcepos=None):
-        """Push a start tag on to the stack.
+        """Called by the tree builder when a new tag is encountered.
 
-        If this method returns None, the tag was rejected by the
+        :param name: Name of the tag.
+        :param nsprefix: Namespace prefix for the tag.
+        :param attrs: A dictionary of attribute values.
+        :param sourceline: The line number where this tag was found in its
+            source document.
+        :param sourcepos: The character position within `sourceline` where this
+            tag was found.
+
+        If this method returns None, the tag was rejected by an active
         SoupStrainer. You should proceed as if the tag had not occurred
         in the document. For instance, if this was a self-closing tag,
         don't call handle_endtag.
         """
-
-        # print "Start tag %s: %s" % (name, attrs)
+        # print("Start tag %s: %s" % (name, attrs))
         self.endData()
 
         if (self.parse_only and len(self.tagStack) <= 1
@@ -598,22 +727,38 @@ class BeautifulSoup(Tag):
         return tag
 
     def handle_endtag(self, name, nsprefix=None):
-        #print "End tag: " + name
+        """Called by the tree builder when an ending tag is encountered.
+
+        :param name: Name of the tag.
+        :param nsprefix: Namespace prefix for the tag.
+        """
+        #print("End tag: " + name)
         self.endData()
         self._popToTag(name, nsprefix)
 
     def handle_data(self, data):
+        """Called by the tree builder when a chunk of textual data is encountered."""
         self.current_data.append(data)
-
+       
     def decode(self, pretty_print=False,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
                formatter="minimal"):
-        """Returns a string or Unicode representation of this document.
-        To get Unicode, pass None for encoding."""
+        """Returns a string or Unicode representation of the parse tree
+            as an HTML or XML document.
 
+        :param pretty_print: If this is True, indentation will be used to
+            make the document more readable.
+        :param eventual_encoding: The encoding of the final document.
+            If this is None, the document will be a Unicode string.
+        """
         if self.is_xml:
             # Print the XML declaration
             encoding_part = ''
+            if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS:
+                # This is a special Python encoding; it can't actually
+                # go into an XML document because it means nothing
+                # outside of Python.
+                eventual_encoding = None
             if eventual_encoding != None:
                 encoding_part = ' encoding="%s"' % eventual_encoding
             prefix = '<?xml version="1.0"%s?>\n' % encoding_part
@@ -626,7 +771,7 @@ class BeautifulSoup(Tag):
         return prefix + super(BeautifulSoup, self).decode(
             indent_level, eventual_encoding, formatter)
 
-# Alias to make it easier to type import: 'from bs4 import _soup'
+# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
 _s = BeautifulSoup
 _soup = BeautifulSoup
 
@@ -642,14 +787,18 @@ class BeautifulStoneSoup(BeautifulSoup):
 
 
 class StopParsing(Exception):
+    """Exception raised by a TreeBuilder if it's unable to continue parsing."""
     pass
 
 class FeatureNotFound(ValueError):
+    """Exception raised by the BeautifulSoup constructor if no parser with the
+    requested features is found.
+    """
     pass
 
 
-#By default, act as an HTML pretty-printer.
+#If this file is run as a script, act as an HTML pretty-printer.
 if __name__ == '__main__':
     import sys
     soup = BeautifulSoup(sys.stdin)
-    print(soup.prettify())
+    print((soup.prettify()))
diff --git a/lib/bs4/builder/__init__.py b/lib/bs4/builder/__init__.py
index 03a4c1e0..bd44905e 100644
--- a/lib/bs4/builder/__init__.py
+++ b/lib/bs4/builder/__init__.py
@@ -7,8 +7,11 @@ import sys
 from bs4.element import (
     CharsetMetaAttributeValue,
     ContentMetaAttributeValue,
+    Stylesheet,
+    Script,
+    TemplateString,
     nonwhitespace_re
-    )
+)
 
 __all__ = [
     'HTMLTreeBuilder',
@@ -27,18 +30,33 @@ HTML_5 = 'html5'
 
 
 class TreeBuilderRegistry(object):
-
+    """A way of looking up TreeBuilder subclasses by their name or by desired
+    features.
+    """
+    
     def __init__(self):
         self.builders_for_feature = defaultdict(list)
         self.builders = []
 
     def register(self, treebuilder_class):
-        """Register a treebuilder based on its advertised features."""
+        """Register a treebuilder based on its advertised features.
+
+        :param treebuilder_class: A subclass of Treebuilder. its .features
+           attribute should list its features.
+        """
         for feature in treebuilder_class.features:
             self.builders_for_feature[feature].insert(0, treebuilder_class)
         self.builders.insert(0, treebuilder_class)
 
     def lookup(self, *features):
+        """Look up a TreeBuilder subclass with the desired features.
+
+        :param features: A list of features to look for. If none are
+            provided, the most recently registered TreeBuilder subclass
+            will be used.
+        :return: A TreeBuilder subclass, or None if there's no
+            registered subclass with all the requested features.
+        """
         if len(self.builders) == 0:
             # There are no builders at all.
             return None
@@ -81,7 +99,7 @@ class TreeBuilderRegistry(object):
 builder_registry = TreeBuilderRegistry()
 
 class TreeBuilder(object):
-    """Turn a document into a Beautiful Soup object tree."""
+    """Turn a textual document into a Beautiful Soup object tree."""
 
     NAME = "[Unknown tree builder]"
     ALTERNATE_NAMES = []
@@ -96,7 +114,12 @@ class TreeBuilder(object):
     # comma-separated list of CDATA, rather than a single CDATA.
     DEFAULT_CDATA_LIST_ATTRIBUTES = {}
 
+    # Whitespace should be preserved inside these tags.
     DEFAULT_PRESERVE_WHITESPACE_TAGS = set()
+
+    # The textual contents of tags with these names should be
+    # instantiated with some class other than NavigableString.
+    DEFAULT_STRING_CONTAINERS = {}
     
     USE_DEFAULT = object()
 
@@ -105,30 +128,39 @@ class TreeBuilder(object):
     
     def __init__(self, multi_valued_attributes=USE_DEFAULT,
                  preserve_whitespace_tags=USE_DEFAULT,
-                 store_line_numbers=USE_DEFAULT):
+                 store_line_numbers=USE_DEFAULT,
+                 string_containers=USE_DEFAULT,
+    ):
         """Constructor.
 
         :param multi_valued_attributes: If this is set to None, the
-        TreeBuilder will not turn any values for attributes like
-        'class' into lists. Setting this do a dictionary will
-        customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES
-        for an example.
+         TreeBuilder will not turn any values for attributes like
+         'class' into lists. Setting this to a dictionary will
+         customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES
+         for an example.
 
-        Internally, these are called "CDATA list attributes", but that
-        probably doesn't make sense to an end-user, so the argument name
-        is `multi_valued_attributes`.
+         Internally, these are called "CDATA list attributes", but that
+         probably doesn't make sense to an end-user, so the argument name
+         is `multi_valued_attributes`.
 
         :param preserve_whitespace_tags: A list of tags to treat
-        the way <pre> tags are treated in HTML. Tags in this list
-        will have 
+         the way <pre> tags are treated in HTML. Tags in this list
+         are immune from pretty-printing; their contents will always be
+         output as-is.
+
+        :param string_containers: A dictionary mapping tag names to
+        the classes that should be instantiated to contain the textual
+        contents of those tags. The default is to use NavigableString
+        for every tag, no matter what the name. You can override the
+        default by changing DEFAULT_STRING_CONTAINERS.
 
         :param store_line_numbers: If the parser keeps track of the
-        line numbers and positions of the original markup, that
-        information will, by default, be stored in each corresponding
-        `Tag` object. You can turn this off by passing
-        store_line_numbers=False. If the parser you're using doesn't 
-        keep track of this information, then setting store_line_numbers=True
-        will do nothing.
+         line numbers and positions of the original markup, that
+         information will, by default, be stored in each corresponding
+         `Tag` object. You can turn this off by passing
+         store_line_numbers=False. If the parser you're using doesn't 
+         keep track of this information, then setting store_line_numbers=True
+         will do nothing.
         """
         self.soup = None
         if multi_valued_attributes is self.USE_DEFAULT:
@@ -139,15 +171,25 @@ class TreeBuilder(object):
         self.preserve_whitespace_tags = preserve_whitespace_tags
         if store_line_numbers == self.USE_DEFAULT:
             store_line_numbers = self.TRACKS_LINE_NUMBERS
-        self.store_line_numbers = store_line_numbers
+        self.store_line_numbers = store_line_numbers 
+        if string_containers == self.USE_DEFAULT:
+            string_containers = self.DEFAULT_STRING_CONTAINERS
+        self.string_containers = string_containers
         
     def initialize_soup(self, soup):
         """The BeautifulSoup object has been initialized and is now
         being associated with the TreeBuilder.
+
+        :param soup: A BeautifulSoup object.
         """
         self.soup = soup
         
     def reset(self):
+        """Do any work necessary to reset the underlying parser
+        for a new document.
+
+        By default, this does nothing.
+        """
         pass
 
     def can_be_empty_element(self, tag_name):
@@ -159,23 +201,57 @@ class TreeBuilder(object):
         For instance: an HTMLBuilder does not consider a <p> tag to be
         an empty-element tag (it's not in
         HTMLBuilder.empty_element_tags). This means an empty <p> tag
-        will be presented as "<p></p>", not "<p />".
+        will be presented as "<p></p>", not "<p/>" or "<p>".
 
         The default implementation has no opinion about which tags are
         empty-element tags, so a tag will be presented as an
-        empty-element tag if and only if it has no contents.
-        "<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
+        empty-element tag if and only if it has no children.
+        "<foo></foo>" will become "<foo/>", and "<foo>bar</foo>" will
         be left alone.
+
+        :param tag_name: The name of a markup tag.
         """
         if self.empty_element_tags is None:
             return True
         return tag_name in self.empty_element_tags
     
     def feed(self, markup):
+        """Run some incoming markup through some parsing process,
+        populating the `BeautifulSoup` object in self.soup.
+
+        This method is not implemented in TreeBuilder; it must be
+        implemented in subclasses.
+
+        :return: None.
+        """
         raise NotImplementedError()
 
     def prepare_markup(self, markup, user_specified_encoding=None,
                        document_declared_encoding=None, exclude_encodings=None):
+        """Run any preliminary steps necessary to make incoming markup
+        acceptable to the parser.
+
+        :param markup: Some markup -- probably a bytestring.
+        :param user_specified_encoding: The user asked to try this encoding.
+        :param document_declared_encoding: The markup itself claims to be
+            in this encoding. NOTE: This argument is not used by the
+            calling code and can probably be removed.
+        :param exclude_encodings: The user asked _not_ to try any of
+            these encodings.
+
+        :yield: A series of 4-tuples:
+         (markup, encoding, declared encoding,
+          has undergone character replacement)
+
+         Each 4-tuple represents a strategy for converting the
+         document to Unicode and parsing it. Each strategy will be tried 
+         in turn.
+
+         By default, the only strategy is to parse the markup
+         as-is. See `LXMLTreeBuilderForXML` and
+         `HTMLParserTreeBuilder` for implementations that take into
+         account the quirks of particular parsers.
+        """
         yield markup, None, None, False
 
     def test_fragment_to_document(self, fragment):
@@ -188,16 +264,36 @@ class TreeBuilder(object):
         results against other HTML fragments.
 
         This method should not be used outside of tests.
+
+        :param fragment: A string -- fragment of HTML.
+        :return: A string -- a full HTML document.
         """
         return fragment
 
     def set_up_substitutions(self, tag):
+        """Set up any substitutions that will need to be performed on 
+        a `Tag` when it's output as a string.
+
+        By default, this does nothing. See `HTMLTreeBuilder` for a
+        case where this is used.
+
+        :param tag: A `Tag`
+        :return: Whether or not a substitution was performed.
+        """
         return False
 
     def _replace_cdata_list_attribute_values(self, tag_name, attrs):
-        """Replaces class="foo bar" with class=["foo", "bar"]
+        """When an attribute value is associated with a tag that can
+        have multiple values for that attribute, convert the string
+        value to a list of strings.
 
-        Modifies its input in place.
+        Basically, replaces class="foo bar" with class=["foo", "bar"]
+
+        NOTE: This method modifies its input in place.
+
+        :param tag_name: The name of a tag.
+        :param attrs: A dictionary containing the tag's attributes.
+           Any appropriate attribute values will be modified in place.
         """
         if not attrs:
             return attrs
@@ -225,7 +321,11 @@ class TreeBuilder(object):
         return attrs
 
 class SAXTreeBuilder(TreeBuilder):
-    """A Beautiful Soup treebuilder that listens for SAX events."""
+    """A Beautiful Soup treebuilder that listens for SAX events.
+
+    This is not currently used for anything, but it demonstrates
+    how a simple TreeBuilder would work.
+    """
 
     def feed(self, markup):
         raise NotImplementedError()
@@ -235,11 +335,11 @@ class SAXTreeBuilder(TreeBuilder):
 
     def startElement(self, name, attrs):
         attrs = dict((key[1], value) for key, value in list(attrs.items()))
-        #print "Start %s, %r" % (name, attrs)
+        #print("Start %s, %r" % (name, attrs))
         self.soup.handle_starttag(name, attrs)
 
     def endElement(self, name):
-        #print "End %s" % name
+        #print("End %s" % name)
         self.soup.handle_endtag(name)
 
     def startElementNS(self, nsTuple, nodeName, attrs):
@@ -289,6 +389,22 @@ class HTMLTreeBuilder(TreeBuilder):
     # but it may do so eventually, and this information is available if
     # you need to use it.
     block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"])
+
+    # The HTML standard defines an unusual content model for these tags.
+    # We represent this by using a string class other than NavigableString
+    # inside these tags.
+    #
+    # I made this list by going through the HTML spec
+    # (https://html.spec.whatwg.org/#metadata-content) and looking for
+    # "metadata content" elements that can contain strings.
+    #
+    # TODO: Arguably <noscript> could go here but it seems
+    # qualitatively different from the other tags.
+    DEFAULT_STRING_CONTAINERS = {
+        'style': Stylesheet,
+        'script': Script,
+        'template': TemplateString,
+    }    
     
     # The HTML standard defines these attributes as containing a
     # space-separated list of values, not a single value. That is,
@@ -317,6 +433,16 @@ class HTMLTreeBuilder(TreeBuilder):
     DEFAULT_PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
     
     def set_up_substitutions(self, tag):
+        """Replace the declared encoding in a <meta> tag with a placeholder,
+        to be substituted when the tag is output to a string.
+
+        An HTML document may come in to Beautiful Soup as one
+        encoding, but exit in a different encoding, and the <meta> tag
+        needs to be changed to reflect this.
+
+        :param tag: A `Tag`
+        :return: Whether or not a substitution was performed.
+        """
         # We are only interested in <meta> tags
         if tag.name != 'meta':
             return False
@@ -351,8 +477,7 @@ class HTMLTreeBuilder(TreeBuilder):
 
 def register_treebuilders_from(module):
     """Copy TreeBuilders from the given module into this module."""
-    # I'm fairly sure this is not the best way to do this.
-    this_module = sys.modules['bs4.builder']
+    this_module = sys.modules[__name__]
     for name in module.__all__:
         obj = getattr(module, name)
 
@@ -363,6 +488,9 @@ def register_treebuilders_from(module):
             this_module.builder_registry.register(obj)
 
 class ParserRejectedMarkup(Exception):
+    """An Exception to be raised when the underlying parser simply
+    refuses to parse the given markup.
+    """
     def __init__(self, message_or_exception):
         """Explain why the parser rejected the given markup, either
         with a textual explanation or another exception.
@@ -375,7 +503,7 @@ class ParserRejectedMarkup(Exception):
 # Builders are registered in reverse order of priority, so that custom
 # builder registrations will take precedence. In general, we want lxml
 # to take precedence over html5lib, because it's faster. And we only
-# want to use HTMLParser as a last result.
+# want to use HTMLParser as a last resort.
 from . import _htmlparser
 register_treebuilders_from(_htmlparser)
 try:
diff --git a/lib/bs4/builder/_html5lib.py b/lib/bs4/builder/_html5lib.py
index 43199189..69aefd72 100644
--- a/lib/bs4/builder/_html5lib.py
+++ b/lib/bs4/builder/_html5lib.py
@@ -39,7 +39,18 @@ except ImportError as e:
     new_html5lib = True
 
 class HTML5TreeBuilder(HTMLTreeBuilder):
-    """Use html5lib to build a tree."""
+    """Use html5lib to build a tree.
+
+    Note that this TreeBuilder does not support some features common
+    to HTML TreeBuilders. Some of these features could theoretically
+    be implemented, but at the very least it's quite difficult,
+    because html5lib moves the parse tree around as it's being built.
+
+    * This TreeBuilder doesn't use different subclasses of NavigableString
+      based on the name of the tag in which the string was found.
+
+    * You can't use a SoupStrainer to parse only part of a document.
+    """
 
     NAME = "html5lib"
 
@@ -116,6 +127,9 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
                 "", "html.parser", store_line_numbers=store_line_numbers,
                 **kwargs
             )
+        # TODO: What are **kwargs exactly? Should they be passed in
+        # here in addition to/instead of being passed to the BeautifulSoup
+        # constructor?
         super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
 
         # This will be set later to an html5lib.html5parser.HTMLParser
@@ -316,9 +330,7 @@ class Element(treebuilder_base.Node):
         return AttrList(self.element)
 
     def setAttributes(self, attributes):
-
         if attributes is not None and len(attributes) > 0:
-
             converted_attributes = []
             for name, value in list(attributes.items()):
                 if isinstance(name, tuple):
@@ -363,9 +375,9 @@ class Element(treebuilder_base.Node):
 
     def reparentChildren(self, new_parent):
         """Move all of this tag's children into another tag."""
-        # print "MOVE", self.element.contents
-        # print "FROM", self.element
-        # print "TO", new_parent.element
+        # print("MOVE", self.element.contents)
+        # print("FROM", self.element)
+        # print("TO", new_parent.element)
 
         element = self.element
         new_parent_element = new_parent.element
@@ -423,9 +435,9 @@ class Element(treebuilder_base.Node):
         element.contents = []
         element.next_element = final_next_element
 
-        # print "DONE WITH MOVE"
-        # print "FROM", self.element
-        # print "TO", new_parent_element
+        # print("DONE WITH MOVE")
+        # print("FROM", self.element)
+        # print("TO", new_parent_element)
 
     def cloneNode(self):
         tag = self.soup.new_tag(self.element.name, self.namespace)
diff --git a/lib/bs4/builder/_htmlparser.py b/lib/bs4/builder/_htmlparser.py
index f1b473fe..70e9be84 100644
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@@ -8,7 +8,7 @@ __all__ = [
     'HTMLParserTreeBuilder',
     ]
 
-from future.moves.html.parser import HTMLParser
+from html.parser import HTMLParser
 
 try:
     from html.parser import HTMLParseError
@@ -53,8 +53,30 @@ from bs4.builder import (
 HTMLPARSER = 'html.parser'
 
 class BeautifulSoupHTMLParser(HTMLParser):
+    """A subclass of the Python standard library's HTMLParser class, which
+    listens for HTMLParser events and translates them into calls
+    to Beautiful Soup's tree construction API.
+    """
 
+    # Strategies for handling duplicate attributes
+    IGNORE = 'ignore'
+    REPLACE = 'replace'
+    
     def __init__(self, *args, **kwargs):
+        """Constructor.
+
+        :param on_duplicate_attribute: A strategy for what to do if a
+            tag includes the same attribute more than once. Accepted
+            values are: REPLACE (replace earlier values with later
+            ones, the default), IGNORE (keep the earliest value
+            encountered), or a callable. A callable must take three
+            arguments: the dictionary of attributes already processed,
+            the name of the duplicate attribute, and the most recent value
+            encountered.           
+        """
+        self.on_duplicate_attribute = kwargs.pop(
+            'on_duplicate_attribute', self.REPLACE
+        )
         HTMLParser.__init__(self, *args, **kwargs)
 
         # Keep a list of empty-element tags that were encountered
@@ -67,20 +89,26 @@ class BeautifulSoupHTMLParser(HTMLParser):
         self.already_closed_empty_element = []
 
     def error(self, msg):
-        """In Python 3, HTMLParser subclasses must implement error(), although this
-        requirement doesn't appear to be documented.
+        """In Python 3, HTMLParser subclasses must implement error(), although
+        this requirement doesn't appear to be documented.
 
-        In Python 2, HTMLParser implements error() as raising an exception.
+        In Python 2, HTMLParser implements error() by raising an exception,
+        which we don't want to do.
 
-        In any event, this method is called only on very strange markup and our best strategy
-        is to pretend it didn't happen and keep going.
+        In any event, this method is called only on very strange
+        markup and our best strategy is to pretend it didn't happen
+        and keep going.
         """
         warnings.warn(msg)
         
     def handle_startendtag(self, name, attrs):
-        # This is only called when the markup looks like
-        # <tag/>.
+        """Handle an incoming empty-element tag.
 
+        This is only called when the markup looks like <tag/>.
+
+        :param name: Name of the tag.
+        :param attrs: Dictionary of the tag's attributes.
+        """
         # is_startend() tells handle_starttag not to close the tag
         # just because its name matches a known empty-element tag. We
         # know that this is an empty-element tag and we want to call
@@ -89,6 +117,14 @@ class BeautifulSoupHTMLParser(HTMLParser):
         self.handle_endtag(name)
         
     def handle_starttag(self, name, attrs, handle_empty_element=True):
+        """Handle an opening tag, e.g. '<tag>'
+
+        :param name: Name of the tag.
+        :param attrs: Dictionary of the tag's attributes.
+        :param handle_empty_element: True if this tag is known to be
+            an empty-element tag (i.e. there is not expected to be any
+            closing tag).
+        """
         # XXX namespace
         attr_dict = {}
         for key, value in attrs:
@@ -96,9 +132,21 @@ class BeautifulSoupHTMLParser(HTMLParser):
             # for consistency with the other tree builders.
             if value is None:
                 value = ''
-            attr_dict[key] = value
+            if key in attr_dict:
+                # A single attribute shows up multiple times in this
+                # tag. How to handle it depends on the
+                # on_duplicate_attribute setting.
+                on_dupe = self.on_duplicate_attribute
+                if on_dupe == self.IGNORE:
+                    pass
+                elif on_dupe in (None, self.REPLACE):
+                    attr_dict[key] = value
+                else:
+                    on_dupe(attr_dict, key, value)
+            else:
+                attr_dict[key] = value
             attrvalue = '""'
-        #print "START", name
+        #print("START", name)
         sourceline, sourcepos = self.getpos()
         tag = self.soup.handle_starttag(
             name, None, None, attr_dict, sourceline=sourceline,
@@ -121,20 +169,34 @@ class BeautifulSoupHTMLParser(HTMLParser):
             self.already_closed_empty_element.append(name)
             
     def handle_endtag(self, name, check_already_closed=True):
-        #print "END", name
+        """Handle a closing tag, e.g. '</tag>'
+        
+        :param name: A tag name.
+        :param check_already_closed: True if this tag is expected to
+           be the closing portion of an empty-element tag,
+           e.g. '<tag></tag>'.
+        """
+        #print("END", name)
         if check_already_closed and name in self.already_closed_empty_element:
             # This is a redundant end tag for an empty-element tag.
             # We've already called handle_endtag() for it, so just
             # check it off the list.
-            # print "ALREADY CLOSED", name
+            #print("ALREADY CLOSED", name)
             self.already_closed_empty_element.remove(name)
         else:
             self.soup.handle_endtag(name)
 
     def handle_data(self, data):
+        """Handle some textual data that shows up between tags."""
         self.soup.handle_data(data)
 
     def handle_charref(self, name):
+        """Handle a numeric character reference by converting it to the
+        corresponding Unicode character and treating it as textual
+        data.
+
+        :param name: Character number, possibly in hexadecimal.
+        """
         # XXX workaround for a bug in HTMLParser. Remove this once
         # it's fixed in all supported versions.
         # http://bugs.python.org/issue13633
@@ -168,6 +230,12 @@ class BeautifulSoupHTMLParser(HTMLParser):
         self.handle_data(data)
 
     def handle_entityref(self, name):
+        """Handle a named entity reference by converting it to the
+        corresponding Unicode character(s) and treating it as textual
+        data.
+
+        :param name: Name of the entity reference.
+        """
         character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
         if character is not None:
             data = character
@@ -181,21 +249,29 @@ class BeautifulSoupHTMLParser(HTMLParser):
         self.handle_data(data)
 
     def handle_comment(self, data):
+        """Handle an HTML comment.
+
+        :param data: The text of the comment.
+        """
         self.soup.endData()
         self.soup.handle_data(data)
         self.soup.endData(Comment)
 
     def handle_decl(self, data):
+        """Handle a DOCTYPE declaration.
+
+        :param data: The text of the declaration.
+        """
         self.soup.endData()
-        if data.startswith("DOCTYPE "):
-            data = data[len("DOCTYPE "):]
-        elif data == 'DOCTYPE':
-            # i.e. "<!DOCTYPE>"
-            data = ''
+        data = data[len("DOCTYPE "):]
         self.soup.handle_data(data)
         self.soup.endData(Doctype)
 
     def unknown_decl(self, data):
+        """Handle a declaration of unknown type -- probably a CDATA block.
+
+        :param data: The text of the declaration.
+        """
         if data.upper().startswith('CDATA['):
             cls = CData
             data = data[len('CDATA['):]
@@ -206,13 +282,19 @@ class BeautifulSoupHTMLParser(HTMLParser):
         self.soup.endData(cls)
 
     def handle_pi(self, data):
+        """Handle a processing instruction.
+
+        :param data: The text of the instruction.
+        """
         self.soup.endData()
         self.soup.handle_data(data)
         self.soup.endData(ProcessingInstruction)
 
 
 class HTMLParserTreeBuilder(HTMLTreeBuilder):
-
+    """A Beautiful soup `TreeBuilder` that uses the `HTMLParser` parser,
+    found in the Python standard library.
+    """
     is_xml = False
     picklable = True
     NAME = HTMLPARSER
@@ -221,36 +303,88 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
     # The html.parser knows which line number and position in the
     # original file is the source of an element.
     TRACKS_LINE_NUMBERS = True
-    
+
     def __init__(self, parser_args=None, parser_kwargs=None, **kwargs):
+        """Constructor.
+
+        :param parser_args: Positional arguments to pass into 
+            the BeautifulSoupHTMLParser constructor, once it's
+            invoked.
+        :param parser_kwargs: Keyword arguments to pass into 
+            the BeautifulSoupHTMLParser constructor, once it's
+            invoked.
+        :param kwargs: Keyword arguments for the superclass constructor.
+        """
+        # Some keyword arguments will be pulled out of kwargs and placed
+        # into parser_kwargs.
+        extra_parser_kwargs = dict()
+        for arg in ('on_duplicate_attribute',):
+            if arg in kwargs:
+                value = kwargs.pop(arg)
+                extra_parser_kwargs[arg] = value
         super(HTMLParserTreeBuilder, self).__init__(**kwargs)
         parser_args = parser_args or []
         parser_kwargs = parser_kwargs or {}
+        parser_kwargs.update(extra_parser_kwargs)
         if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
             parser_kwargs['strict'] = False
         if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
             parser_kwargs['convert_charrefs'] = False
         self.parser_args = (parser_args, parser_kwargs)
-
+        
     def prepare_markup(self, markup, user_specified_encoding=None,
                        document_declared_encoding=None, exclude_encodings=None):
-        """
-        :return: A 4-tuple (markup, original encoding, encoding
-        declared within markup, whether any characters had to be
-        replaced with REPLACEMENT CHARACTER).
+
+        """Run any preliminary steps necessary to make incoming markup
+        acceptable to the parser.
+
+        :param markup: Some markup -- probably a bytestring.
+        :param user_specified_encoding: The user asked to try this encoding.
+        :param document_declared_encoding: The markup itself claims to be
+            in this encoding.
+        :param exclude_encodings: The user asked _not_ to try any of
+            these encodings.
+
+        :yield: A series of 4-tuples:
+         (markup, encoding, declared encoding,
+          has undergone character replacement)
+
+         Each 4-tuple represents a strategy for converting the
+         document to Unicode and parsing it. Each strategy will be tried 
+         in turn.
         """
         if isinstance(markup, str):
+            # Parse Unicode as-is.
             yield (markup, None, None, False)
             return
 
+        # Ask UnicodeDammit to sniff the most likely encoding.
+
+        # This was provided by the end-user; treat it as a known
+        # definite encoding per the algorithm laid out in the HTML5
+        # spec.  (See the EncodingDetector class for details.)
+        known_definite_encodings = [user_specified_encoding]
+
+        # This was found in the document; treat it as a slightly lower-priority
+        # user encoding.
+        user_encodings = [document_declared_encoding]
+
         try_encodings = [user_specified_encoding, document_declared_encoding]
-        dammit = UnicodeDammit(markup, try_encodings, is_html=True,
-                               exclude_encodings=exclude_encodings)
+        dammit = UnicodeDammit(
+            markup,
+            known_definite_encodings=known_definite_encodings,
+            user_encodings=user_encodings,
+            is_html=True,
+            exclude_encodings=exclude_encodings
+        )
         yield (dammit.markup, dammit.original_encoding,
                dammit.declared_html_encoding,
                dammit.contains_replacement_characters)
 
     def feed(self, markup):
+        """Run some incoming markup through some parsing process,
+        populating the `BeautifulSoup` object in self.soup.
+        """
         args, kwargs = self.parser_args
         parser = BeautifulSoupHTMLParser(*args, **kwargs)
         parser.soup = self.soup
diff --git a/lib/bs4/builder/_lxml.py b/lib/bs4/builder/_lxml.py
index f5257963..11c9a696 100644
--- a/lib/bs4/builder/_lxml.py
+++ b/lib/bs4/builder/_lxml.py
@@ -62,10 +62,13 @@ class LXMLTreeBuilderForXML(TreeBuilder):
     # But instead we build an XMLParser or HTMLParser object to serve
     # as the target of parse messages, and those messages don't include
     # line numbers.
+    # See: https://bugs.launchpad.net/lxml/+bug/1846906
     
     def initialize_soup(self, soup):
         """Let the BeautifulSoup object know about the standard namespace
         mapping.
+
+        :param soup: A `BeautifulSoup`.
         """
         super(LXMLTreeBuilderForXML, self).initialize_soup(soup)
         self._register_namespaces(self.DEFAULT_NSMAPS)
@@ -75,6 +78,8 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         while parsing the document.
 
         This might be useful later on when creating CSS selectors.
+
+        :param mapping: A dictionary mapping namespace prefixes to URIs.
         """
         for key, value in list(mapping.items()):
             if key and key not in self.soup._namespaces:
@@ -84,20 +89,31 @@ class LXMLTreeBuilderForXML(TreeBuilder):
                 self.soup._namespaces[key] = value
 
     def default_parser(self, encoding):
-        # This can either return a parser object or a class, which
-        # will be instantiated with default arguments.
+        """Find the default parser for the given encoding.
+
+        :param encoding: A string.
+        :return: Either a parser object or a class, which
+          will be instantiated with default arguments.
+        """
         if self._default_parser is not None:
             return self._default_parser
         return etree.XMLParser(
             target=self, strip_cdata=False, recover=True, encoding=encoding)
 
     def parser_for(self, encoding):
+        """Instantiate an appropriate parser for the given encoding.
+
+        :param encoding: A string.
+        :return: A parser object such as an `etree.XMLParser`.
+        """
         # Use the default parser.
         parser = self.default_parser(encoding)
 
         if isinstance(parser, Callable):
             # Instantiate the parser with default arguments
-            parser = parser(target=self, strip_cdata=False, encoding=encoding)
+            parser = parser(
+                target=self, strip_cdata=False, recover=True, encoding=encoding
+            )
         return parser
 
     def __init__(self, parser=None, empty_element_tags=None, **kwargs):
@@ -122,17 +138,31 @@ class LXMLTreeBuilderForXML(TreeBuilder):
     def prepare_markup(self, markup, user_specified_encoding=None,
                        exclude_encodings=None,
                        document_declared_encoding=None):
-        """
-        :yield: A series of 4-tuples.
+        """Run any preliminary steps necessary to make incoming markup
+        acceptable to the parser.
+
+        lxml really wants to get a bytestring and convert it to
+        Unicode itself. So instead of using UnicodeDammit to convert
+        the bytestring to Unicode using different encodings, this
+        implementation uses EncodingDetector to iterate over the
+        encodings, and tell lxml to try to parse the document as each
+        one in turn.
+
+        :param markup: Some markup -- hopefully a bytestring.
+        :param user_specified_encoding: The user asked to try this encoding.
+        :param document_declared_encoding: The markup itself claims to be
+            in this encoding.
+        :param exclude_encodings: The user asked _not_ to try any of
+            these encodings.
+
+        :yield: A series of 4-tuples:
          (markup, encoding, declared encoding,
           has undergone character replacement)
 
-        Each 4-tuple represents a strategy for parsing the document.
+         Each 4-tuple represents a strategy for converting the
+         document to Unicode and parsing it. Each strategy will be tried 
+         in turn.
         """
-        # Instead of using UnicodeDammit to convert the bytestring to
-        # Unicode using different encodings, use EncodingDetector to
-        # iterate over the encodings, and tell lxml to try to parse
-        # the document as each one in turn.
         is_html = not self.is_xml
         if is_html:
             self.processing_instruction_class = ProcessingInstruction
@@ -150,9 +180,19 @@ class LXMLTreeBuilderForXML(TreeBuilder):
             yield (markup.encode("utf8"), "utf8",
                    document_declared_encoding, False)
 
-        try_encodings = [user_specified_encoding, document_declared_encoding]
+        # This was provided by the end-user; treat it as a known
+        # definite encoding per the algorithm laid out in the HTML5
+        # spec.  (See the EncodingDetector class for details.)
+        known_definite_encodings = [user_specified_encoding]
+
+        # This was found in the document; treat it as a slightly lower-priority
+        # user encoding.
+        user_encodings = [document_declared_encoding]
         detector = EncodingDetector(
-            markup, try_encodings, is_html, exclude_encodings)
+            markup, known_definite_encodings=known_definite_encodings,
+            user_encodings=user_encodings, is_html=is_html,
+            exclude_encodings=exclude_encodings
+        )
         for encoding in detector.encodings:
             yield (detector.markup, encoding, document_declared_encoding, False)
 
diff --git a/lib/bs4/check_block.py b/lib/bs4/check_block.py
deleted file mode 100644
index a60a7b74..00000000
--- a/lib/bs4/check_block.py
+++ /dev/null
@@ -1,4 +0,0 @@
-import requests
-data = requests.get("https://www.crummy.com/").content
-from bs4 import _s
-data = [x for x in _s(data).block_text()]
diff --git a/lib/bs4/dammit.py b/lib/bs4/dammit.py
index e8cdd147..e017408b 100644
--- a/lib/bs4/dammit.py
+++ b/lib/bs4/dammit.py
@@ -9,9 +9,9 @@ XML or HTML to reflect a new encoding; that's the tree builder's job.
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
 
+from html.entities import codepoint2name
+from collections import defaultdict
 import codecs
-from future.moves.html.entities import codepoint2name
-from future.builtins import chr
 import re
 import logging
 import string
@@ -44,6 +44,9 @@ except ImportError:
             return None
 
 # Available from http://cjkpython.i18n.org/.
+#
+# TODO: This doesn't work anymore and the closest thing, iconv_codecs,
+# is GPL-licensed. Check whether this is still necessary.
 try:
     import iconv_codec
 except ImportError:
@@ -51,7 +54,7 @@ except ImportError:
 
 # Build bytestring and Unicode versions of regular expressions for finding
 # a declared encoding inside an XML or HTML document.
-xml_encoding = '^\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'
+xml_encoding = '^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'
 html_meta = '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'
 encoding_res = dict()
 encoding_res[bytes] = {
@@ -63,35 +66,2368 @@ encoding_res[str] = {
     'xml' : re.compile(xml_encoding, re.I)
 }
 
-class EntitySubstitution(object):
+try:
+    from html.entities import html5
+except ImportError:
+    # This is a copy of html.entities.html5 from Python 3.9. There's
+    # no equivalent table in Python 2, so we'll just provide a copy
+    # here.
+    html5 = {
+    'Aacute': '\xc1',
+    'aacute': '\xe1',
+    'Aacute;': '\xc1',
+    'aacute;': '\xe1',
+    'Abreve;': '\u0102',
+    'abreve;': '\u0103',
+    'ac;': '\u223e',
+    'acd;': '\u223f',
+    'acE;': '\u223e\u0333',
+    'Acirc': '\xc2',
+    'acirc': '\xe2',
+    'Acirc;': '\xc2',
+    'acirc;': '\xe2',
+    'acute': '\xb4',
+    'acute;': '\xb4',
+    'Acy;': '\u0410',
+    'acy;': '\u0430',
+    'AElig': '\xc6',
+    'aelig': '\xe6',
+    'AElig;': '\xc6',
+    'aelig;': '\xe6',
+    'af;': '\u2061',
+    'Afr;': '\U0001d504',
+    'afr;': '\U0001d51e',
+    'Agrave': '\xc0',
+    'agrave': '\xe0',
+    'Agrave;': '\xc0',
+    'agrave;': '\xe0',
+    'alefsym;': '\u2135',
+    'aleph;': '\u2135',
+    'Alpha;': '\u0391',
+    'alpha;': '\u03b1',
+    'Amacr;': '\u0100',
+    'amacr;': '\u0101',
+    'amalg;': '\u2a3f',
+    'AMP': '&',
+    'amp': '&',
+    'AMP;': '&',
+    'amp;': '&',
+    'And;': '\u2a53',
+    'and;': '\u2227',
+    'andand;': '\u2a55',
+    'andd;': '\u2a5c',
+    'andslope;': '\u2a58',
+    'andv;': '\u2a5a',
+    'ang;': '\u2220',
+    'ange;': '\u29a4',
+    'angle;': '\u2220',
+    'angmsd;': '\u2221',
+    'angmsdaa;': '\u29a8',
+    'angmsdab;': '\u29a9',
+    'angmsdac;': '\u29aa',
+    'angmsdad;': '\u29ab',
+    'angmsdae;': '\u29ac',
+    'angmsdaf;': '\u29ad',
+    'angmsdag;': '\u29ae',
+    'angmsdah;': '\u29af',
+    'angrt;': '\u221f',
+    'angrtvb;': '\u22be',
+    'angrtvbd;': '\u299d',
+    'angsph;': '\u2222',
+    'angst;': '\xc5',
+    'angzarr;': '\u237c',
+    'Aogon;': '\u0104',
+    'aogon;': '\u0105',
+    'Aopf;': '\U0001d538',
+    'aopf;': '\U0001d552',
+    'ap;': '\u2248',
+    'apacir;': '\u2a6f',
+    'apE;': '\u2a70',
+    'ape;': '\u224a',
+    'apid;': '\u224b',
+    'apos;': "'",
+    'ApplyFunction;': '\u2061',
+    'approx;': '\u2248',
+    'approxeq;': '\u224a',
+    'Aring': '\xc5',
+    'aring': '\xe5',
+    'Aring;': '\xc5',
+    'aring;': '\xe5',
+    'Ascr;': '\U0001d49c',
+    'ascr;': '\U0001d4b6',
+    'Assign;': '\u2254',
+    'ast;': '*',
+    'asymp;': '\u2248',
+    'asympeq;': '\u224d',
+    'Atilde': '\xc3',
+    'atilde': '\xe3',
+    'Atilde;': '\xc3',
+    'atilde;': '\xe3',
+    'Auml': '\xc4',
+    'auml': '\xe4',
+    'Auml;': '\xc4',
+    'auml;': '\xe4',
+    'awconint;': '\u2233',
+    'awint;': '\u2a11',
+    'backcong;': '\u224c',
+    'backepsilon;': '\u03f6',
+    'backprime;': '\u2035',
+    'backsim;': '\u223d',
+    'backsimeq;': '\u22cd',
+    'Backslash;': '\u2216',
+    'Barv;': '\u2ae7',
+    'barvee;': '\u22bd',
+    'Barwed;': '\u2306',
+    'barwed;': '\u2305',
+    'barwedge;': '\u2305',
+    'bbrk;': '\u23b5',
+    'bbrktbrk;': '\u23b6',
+    'bcong;': '\u224c',
+    'Bcy;': '\u0411',
+    'bcy;': '\u0431',
+    'bdquo;': '\u201e',
+    'becaus;': '\u2235',
+    'Because;': '\u2235',
+    'because;': '\u2235',
+    'bemptyv;': '\u29b0',
+    'bepsi;': '\u03f6',
+    'bernou;': '\u212c',
+    'Bernoullis;': '\u212c',
+    'Beta;': '\u0392',
+    'beta;': '\u03b2',
+    'beth;': '\u2136',
+    'between;': '\u226c',
+    'Bfr;': '\U0001d505',
+    'bfr;': '\U0001d51f',
+    'bigcap;': '\u22c2',
+    'bigcirc;': '\u25ef',
+    'bigcup;': '\u22c3',
+    'bigodot;': '\u2a00',
+    'bigoplus;': '\u2a01',
+    'bigotimes;': '\u2a02',
+    'bigsqcup;': '\u2a06',
+    'bigstar;': '\u2605',
+    'bigtriangledown;': '\u25bd',
+    'bigtriangleup;': '\u25b3',
+    'biguplus;': '\u2a04',
+    'bigvee;': '\u22c1',
+    'bigwedge;': '\u22c0',
+    'bkarow;': '\u290d',
+    'blacklozenge;': '\u29eb',
+    'blacksquare;': '\u25aa',
+    'blacktriangle;': '\u25b4',
+    'blacktriangledown;': '\u25be',
+    'blacktriangleleft;': '\u25c2',
+    'blacktriangleright;': '\u25b8',
+    'blank;': '\u2423',
+    'blk12;': '\u2592',
+    'blk14;': '\u2591',
+    'blk34;': '\u2593',
+    'block;': '\u2588',
+    'bne;': '=\u20e5',
+    'bnequiv;': '\u2261\u20e5',
+    'bNot;': '\u2aed',
+    'bnot;': '\u2310',
+    'Bopf;': '\U0001d539',
+    'bopf;': '\U0001d553',
+    'bot;': '\u22a5',
+    'bottom;': '\u22a5',
+    'bowtie;': '\u22c8',
+    'boxbox;': '\u29c9',
+    'boxDL;': '\u2557',
+    'boxDl;': '\u2556',
+    'boxdL;': '\u2555',
+    'boxdl;': '\u2510',
+    'boxDR;': '\u2554',
+    'boxDr;': '\u2553',
+    'boxdR;': '\u2552',
+    'boxdr;': '\u250c',
+    'boxH;': '\u2550',
+    'boxh;': '\u2500',
+    'boxHD;': '\u2566',
+    'boxHd;': '\u2564',
+    'boxhD;': '\u2565',
+    'boxhd;': '\u252c',
+    'boxHU;': '\u2569',
+    'boxHu;': '\u2567',
+    'boxhU;': '\u2568',
+    'boxhu;': '\u2534',
+    'boxminus;': '\u229f',
+    'boxplus;': '\u229e',
+    'boxtimes;': '\u22a0',
+    'boxUL;': '\u255d',
+    'boxUl;': '\u255c',
+    'boxuL;': '\u255b',
+    'boxul;': '\u2518',
+    'boxUR;': '\u255a',
+    'boxUr;': '\u2559',
+    'boxuR;': '\u2558',
+    'boxur;': '\u2514',
+    'boxV;': '\u2551',
+    'boxv;': '\u2502',
+    'boxVH;': '\u256c',
+    'boxVh;': '\u256b',
+    'boxvH;': '\u256a',
+    'boxvh;': '\u253c',
+    'boxVL;': '\u2563',
+    'boxVl;': '\u2562',
+    'boxvL;': '\u2561',
+    'boxvl;': '\u2524',
+    'boxVR;': '\u2560',
+    'boxVr;': '\u255f',
+    'boxvR;': '\u255e',
+    'boxvr;': '\u251c',
+    'bprime;': '\u2035',
+    'Breve;': '\u02d8',
+    'breve;': '\u02d8',
+    'brvbar': '\xa6',
+    'brvbar;': '\xa6',
+    'Bscr;': '\u212c',
+    'bscr;': '\U0001d4b7',
+    'bsemi;': '\u204f',
+    'bsim;': '\u223d',
+    'bsime;': '\u22cd',
+    'bsol;': '\\',
+    'bsolb;': '\u29c5',
+    'bsolhsub;': '\u27c8',
+    'bull;': '\u2022',
+    'bullet;': '\u2022',
+    'bump;': '\u224e',
+    'bumpE;': '\u2aae',
+    'bumpe;': '\u224f',
+    'Bumpeq;': '\u224e',
+    'bumpeq;': '\u224f',
+    'Cacute;': '\u0106',
+    'cacute;': '\u0107',
+    'Cap;': '\u22d2',
+    'cap;': '\u2229',
+    'capand;': '\u2a44',
+    'capbrcup;': '\u2a49',
+    'capcap;': '\u2a4b',
+    'capcup;': '\u2a47',
+    'capdot;': '\u2a40',
+    'CapitalDifferentialD;': '\u2145',
+    'caps;': '\u2229\ufe00',
+    'caret;': '\u2041',
+    'caron;': '\u02c7',
+    'Cayleys;': '\u212d',
+    'ccaps;': '\u2a4d',
+    'Ccaron;': '\u010c',
+    'ccaron;': '\u010d',
+    'Ccedil': '\xc7',
+    'ccedil': '\xe7',
+    'Ccedil;': '\xc7',
+    'ccedil;': '\xe7',
+    'Ccirc;': '\u0108',
+    'ccirc;': '\u0109',
+    'Cconint;': '\u2230',
+    'ccups;': '\u2a4c',
+    'ccupssm;': '\u2a50',
+    'Cdot;': '\u010a',
+    'cdot;': '\u010b',
+    'cedil': '\xb8',
+    'cedil;': '\xb8',
+    'Cedilla;': '\xb8',
+    'cemptyv;': '\u29b2',
+    'cent': '\xa2',
+    'cent;': '\xa2',
+    'CenterDot;': '\xb7',
+    'centerdot;': '\xb7',
+    'Cfr;': '\u212d',
+    'cfr;': '\U0001d520',
+    'CHcy;': '\u0427',
+    'chcy;': '\u0447',
+    'check;': '\u2713',
+    'checkmark;': '\u2713',
+    'Chi;': '\u03a7',
+    'chi;': '\u03c7',
+    'cir;': '\u25cb',
+    'circ;': '\u02c6',
+    'circeq;': '\u2257',
+    'circlearrowleft;': '\u21ba',
+    'circlearrowright;': '\u21bb',
+    'circledast;': '\u229b',
+    'circledcirc;': '\u229a',
+    'circleddash;': '\u229d',
+    'CircleDot;': '\u2299',
+    'circledR;': '\xae',
+    'circledS;': '\u24c8',
+    'CircleMinus;': '\u2296',
+    'CirclePlus;': '\u2295',
+    'CircleTimes;': '\u2297',
+    'cirE;': '\u29c3',
+    'cire;': '\u2257',
+    'cirfnint;': '\u2a10',
+    'cirmid;': '\u2aef',
+    'cirscir;': '\u29c2',
+    'ClockwiseContourIntegral;': '\u2232',
+    'CloseCurlyDoubleQuote;': '\u201d',
+    'CloseCurlyQuote;': '\u2019',
+    'clubs;': '\u2663',
+    'clubsuit;': '\u2663',
+    'Colon;': '\u2237',
+    'colon;': ':',
+    'Colone;': '\u2a74',
+    'colone;': '\u2254',
+    'coloneq;': '\u2254',
+    'comma;': ',',
+    'commat;': '@',
+    'comp;': '\u2201',
+    'compfn;': '\u2218',
+    'complement;': '\u2201',
+    'complexes;': '\u2102',
+    'cong;': '\u2245',
+    'congdot;': '\u2a6d',
+    'Congruent;': '\u2261',
+    'Conint;': '\u222f',
+    'conint;': '\u222e',
+    'ContourIntegral;': '\u222e',
+    'Copf;': '\u2102',
+    'copf;': '\U0001d554',
+    'coprod;': '\u2210',
+    'Coproduct;': '\u2210',
+    'COPY': '\xa9',
+    'copy': '\xa9',
+    'COPY;': '\xa9',
+    'copy;': '\xa9',
+    'copysr;': '\u2117',
+    'CounterClockwiseContourIntegral;': '\u2233',
+    'crarr;': '\u21b5',
+    'Cross;': '\u2a2f',
+    'cross;': '\u2717',
+    'Cscr;': '\U0001d49e',
+    'cscr;': '\U0001d4b8',
+    'csub;': '\u2acf',
+    'csube;': '\u2ad1',
+    'csup;': '\u2ad0',
+    'csupe;': '\u2ad2',
+    'ctdot;': '\u22ef',
+    'cudarrl;': '\u2938',
+    'cudarrr;': '\u2935',
+    'cuepr;': '\u22de',
+    'cuesc;': '\u22df',
+    'cularr;': '\u21b6',
+    'cularrp;': '\u293d',
+    'Cup;': '\u22d3',
+    'cup;': '\u222a',
+    'cupbrcap;': '\u2a48',
+    'CupCap;': '\u224d',
+    'cupcap;': '\u2a46',
+    'cupcup;': '\u2a4a',
+    'cupdot;': '\u228d',
+    'cupor;': '\u2a45',
+    'cups;': '\u222a\ufe00',
+    'curarr;': '\u21b7',
+    'curarrm;': '\u293c',
+    'curlyeqprec;': '\u22de',
+    'curlyeqsucc;': '\u22df',
+    'curlyvee;': '\u22ce',
+    'curlywedge;': '\u22cf',
+    'curren': '\xa4',
+    'curren;': '\xa4',
+    'curvearrowleft;': '\u21b6',
+    'curvearrowright;': '\u21b7',
+    'cuvee;': '\u22ce',
+    'cuwed;': '\u22cf',
+    'cwconint;': '\u2232',
+    'cwint;': '\u2231',
+    'cylcty;': '\u232d',
+    'Dagger;': '\u2021',
+    'dagger;': '\u2020',
+    'daleth;': '\u2138',
+    'Darr;': '\u21a1',
+    'dArr;': '\u21d3',
+    'darr;': '\u2193',
+    'dash;': '\u2010',
+    'Dashv;': '\u2ae4',
+    'dashv;': '\u22a3',
+    'dbkarow;': '\u290f',
+    'dblac;': '\u02dd',
+    'Dcaron;': '\u010e',
+    'dcaron;': '\u010f',
+    'Dcy;': '\u0414',
+    'dcy;': '\u0434',
+    'DD;': '\u2145',
+    'dd;': '\u2146',
+    'ddagger;': '\u2021',
+    'ddarr;': '\u21ca',
+    'DDotrahd;': '\u2911',
+    'ddotseq;': '\u2a77',
+    'deg': '\xb0',
+    'deg;': '\xb0',
+    'Del;': '\u2207',
+    'Delta;': '\u0394',
+    'delta;': '\u03b4',
+    'demptyv;': '\u29b1',
+    'dfisht;': '\u297f',
+    'Dfr;': '\U0001d507',
+    'dfr;': '\U0001d521',
+    'dHar;': '\u2965',
+    'dharl;': '\u21c3',
+    'dharr;': '\u21c2',
+    'DiacriticalAcute;': '\xb4',
+    'DiacriticalDot;': '\u02d9',
+    'DiacriticalDoubleAcute;': '\u02dd',
+    'DiacriticalGrave;': '`',
+    'DiacriticalTilde;': '\u02dc',
+    'diam;': '\u22c4',
+    'Diamond;': '\u22c4',
+    'diamond;': '\u22c4',
+    'diamondsuit;': '\u2666',
+    'diams;': '\u2666',
+    'die;': '\xa8',
+    'DifferentialD;': '\u2146',
+    'digamma;': '\u03dd',
+    'disin;': '\u22f2',
+    'div;': '\xf7',
+    'divide': '\xf7',
+    'divide;': '\xf7',
+    'divideontimes;': '\u22c7',
+    'divonx;': '\u22c7',
+    'DJcy;': '\u0402',
+    'djcy;': '\u0452',
+    'dlcorn;': '\u231e',
+    'dlcrop;': '\u230d',
+    'dollar;': '$',
+    'Dopf;': '\U0001d53b',
+    'dopf;': '\U0001d555',
+    'Dot;': '\xa8',
+    'dot;': '\u02d9',
+    'DotDot;': '\u20dc',
+    'doteq;': '\u2250',
+    'doteqdot;': '\u2251',
+    'DotEqual;': '\u2250',
+    'dotminus;': '\u2238',
+    'dotplus;': '\u2214',
+    'dotsquare;': '\u22a1',
+    'doublebarwedge;': '\u2306',
+    'DoubleContourIntegral;': '\u222f',
+    'DoubleDot;': '\xa8',
+    'DoubleDownArrow;': '\u21d3',
+    'DoubleLeftArrow;': '\u21d0',
+    'DoubleLeftRightArrow;': '\u21d4',
+    'DoubleLeftTee;': '\u2ae4',
+    'DoubleLongLeftArrow;': '\u27f8',
+    'DoubleLongLeftRightArrow;': '\u27fa',
+    'DoubleLongRightArrow;': '\u27f9',
+    'DoubleRightArrow;': '\u21d2',
+    'DoubleRightTee;': '\u22a8',
+    'DoubleUpArrow;': '\u21d1',
+    'DoubleUpDownArrow;': '\u21d5',
+    'DoubleVerticalBar;': '\u2225',
+    'DownArrow;': '\u2193',
+    'Downarrow;': '\u21d3',
+    'downarrow;': '\u2193',
+    'DownArrowBar;': '\u2913',
+    'DownArrowUpArrow;': '\u21f5',
+    'DownBreve;': '\u0311',
+    'downdownarrows;': '\u21ca',
+    'downharpoonleft;': '\u21c3',
+    'downharpoonright;': '\u21c2',
+    'DownLeftRightVector;': '\u2950',
+    'DownLeftTeeVector;': '\u295e',
+    'DownLeftVector;': '\u21bd',
+    'DownLeftVectorBar;': '\u2956',
+    'DownRightTeeVector;': '\u295f',
+    'DownRightVector;': '\u21c1',
+    'DownRightVectorBar;': '\u2957',
+    'DownTee;': '\u22a4',
+    'DownTeeArrow;': '\u21a7',
+    'drbkarow;': '\u2910',
+    'drcorn;': '\u231f',
+    'drcrop;': '\u230c',
+    'Dscr;': '\U0001d49f',
+    'dscr;': '\U0001d4b9',
+    'DScy;': '\u0405',
+    'dscy;': '\u0455',
+    'dsol;': '\u29f6',
+    'Dstrok;': '\u0110',
+    'dstrok;': '\u0111',
+    'dtdot;': '\u22f1',
+    'dtri;': '\u25bf',
+    'dtrif;': '\u25be',
+    'duarr;': '\u21f5',
+    'duhar;': '\u296f',
+    'dwangle;': '\u29a6',
+    'DZcy;': '\u040f',
+    'dzcy;': '\u045f',
+    'dzigrarr;': '\u27ff',
+    'Eacute': '\xc9',
+    'eacute': '\xe9',
+    'Eacute;': '\xc9',
+    'eacute;': '\xe9',
+    'easter;': '\u2a6e',
+    'Ecaron;': '\u011a',
+    'ecaron;': '\u011b',
+    'ecir;': '\u2256',
+    'Ecirc': '\xca',
+    'ecirc': '\xea',
+    'Ecirc;': '\xca',
+    'ecirc;': '\xea',
+    'ecolon;': '\u2255',
+    'Ecy;': '\u042d',
+    'ecy;': '\u044d',
+    'eDDot;': '\u2a77',
+    'Edot;': '\u0116',
+    'eDot;': '\u2251',
+    'edot;': '\u0117',
+    'ee;': '\u2147',
+    'efDot;': '\u2252',
+    'Efr;': '\U0001d508',
+    'efr;': '\U0001d522',
+    'eg;': '\u2a9a',
+    'Egrave': '\xc8',
+    'egrave': '\xe8',
+    'Egrave;': '\xc8',
+    'egrave;': '\xe8',
+    'egs;': '\u2a96',
+    'egsdot;': '\u2a98',
+    'el;': '\u2a99',
+    'Element;': '\u2208',
+    'elinters;': '\u23e7',
+    'ell;': '\u2113',
+    'els;': '\u2a95',
+    'elsdot;': '\u2a97',
+    'Emacr;': '\u0112',
+    'emacr;': '\u0113',
+    'empty;': '\u2205',
+    'emptyset;': '\u2205',
+    'EmptySmallSquare;': '\u25fb',
+    'emptyv;': '\u2205',
+    'EmptyVerySmallSquare;': '\u25ab',
+    'emsp13;': '\u2004',
+    'emsp14;': '\u2005',
+    'emsp;': '\u2003',
+    'ENG;': '\u014a',
+    'eng;': '\u014b',
+    'ensp;': '\u2002',
+    'Eogon;': '\u0118',
+    'eogon;': '\u0119',
+    'Eopf;': '\U0001d53c',
+    'eopf;': '\U0001d556',
+    'epar;': '\u22d5',
+    'eparsl;': '\u29e3',
+    'eplus;': '\u2a71',
+    'epsi;': '\u03b5',
+    'Epsilon;': '\u0395',
+    'epsilon;': '\u03b5',
+    'epsiv;': '\u03f5',
+    'eqcirc;': '\u2256',
+    'eqcolon;': '\u2255',
+    'eqsim;': '\u2242',
+    'eqslantgtr;': '\u2a96',
+    'eqslantless;': '\u2a95',
+    'Equal;': '\u2a75',
+    'equals;': '=',
+    'EqualTilde;': '\u2242',
+    'equest;': '\u225f',
+    'Equilibrium;': '\u21cc',
+    'equiv;': '\u2261',
+    'equivDD;': '\u2a78',
+    'eqvparsl;': '\u29e5',
+    'erarr;': '\u2971',
+    'erDot;': '\u2253',
+    'Escr;': '\u2130',
+    'escr;': '\u212f',
+    'esdot;': '\u2250',
+    'Esim;': '\u2a73',
+    'esim;': '\u2242',
+    'Eta;': '\u0397',
+    'eta;': '\u03b7',
+    'ETH': '\xd0',
+    'eth': '\xf0',
+    'ETH;': '\xd0',
+    'eth;': '\xf0',
+    'Euml': '\xcb',
+    'euml': '\xeb',
+    'Euml;': '\xcb',
+    'euml;': '\xeb',
+    'euro;': '\u20ac',
+    'excl;': '!',
+    'exist;': '\u2203',
+    'Exists;': '\u2203',
+    'expectation;': '\u2130',
+    'ExponentialE;': '\u2147',
+    'exponentiale;': '\u2147',
+    'fallingdotseq;': '\u2252',
+    'Fcy;': '\u0424',
+    'fcy;': '\u0444',
+    'female;': '\u2640',
+    'ffilig;': '\ufb03',
+    'fflig;': '\ufb00',
+    'ffllig;': '\ufb04',
+    'Ffr;': '\U0001d509',
+    'ffr;': '\U0001d523',
+    'filig;': '\ufb01',
+    'FilledSmallSquare;': '\u25fc',
+    'FilledVerySmallSquare;': '\u25aa',
+    'fjlig;': 'fj',
+    'flat;': '\u266d',
+    'fllig;': '\ufb02',
+    'fltns;': '\u25b1',
+    'fnof;': '\u0192',
+    'Fopf;': '\U0001d53d',
+    'fopf;': '\U0001d557',
+    'ForAll;': '\u2200',
+    'forall;': '\u2200',
+    'fork;': '\u22d4',
+    'forkv;': '\u2ad9',
+    'Fouriertrf;': '\u2131',
+    'fpartint;': '\u2a0d',
+    'frac12': '\xbd',
+    'frac12;': '\xbd',
+    'frac13;': '\u2153',
+    'frac14': '\xbc',
+    'frac14;': '\xbc',
+    'frac15;': '\u2155',
+    'frac16;': '\u2159',
+    'frac18;': '\u215b',
+    'frac23;': '\u2154',
+    'frac25;': '\u2156',
+    'frac34': '\xbe',
+    'frac34;': '\xbe',
+    'frac35;': '\u2157',
+    'frac38;': '\u215c',
+    'frac45;': '\u2158',
+    'frac56;': '\u215a',
+    'frac58;': '\u215d',
+    'frac78;': '\u215e',
+    'frasl;': '\u2044',
+    'frown;': '\u2322',
+    'Fscr;': '\u2131',
+    'fscr;': '\U0001d4bb',
+    'gacute;': '\u01f5',
+    'Gamma;': '\u0393',
+    'gamma;': '\u03b3',
+    'Gammad;': '\u03dc',
+    'gammad;': '\u03dd',
+    'gap;': '\u2a86',
+    'Gbreve;': '\u011e',
+    'gbreve;': '\u011f',
+    'Gcedil;': '\u0122',
+    'Gcirc;': '\u011c',
+    'gcirc;': '\u011d',
+    'Gcy;': '\u0413',
+    'gcy;': '\u0433',
+    'Gdot;': '\u0120',
+    'gdot;': '\u0121',
+    'gE;': '\u2267',
+    'ge;': '\u2265',
+    'gEl;': '\u2a8c',
+    'gel;': '\u22db',
+    'geq;': '\u2265',
+    'geqq;': '\u2267',
+    'geqslant;': '\u2a7e',
+    'ges;': '\u2a7e',
+    'gescc;': '\u2aa9',
+    'gesdot;': '\u2a80',
+    'gesdoto;': '\u2a82',
+    'gesdotol;': '\u2a84',
+    'gesl;': '\u22db\ufe00',
+    'gesles;': '\u2a94',
+    'Gfr;': '\U0001d50a',
+    'gfr;': '\U0001d524',
+    'Gg;': '\u22d9',
+    'gg;': '\u226b',
+    'ggg;': '\u22d9',
+    'gimel;': '\u2137',
+    'GJcy;': '\u0403',
+    'gjcy;': '\u0453',
+    'gl;': '\u2277',
+    'gla;': '\u2aa5',
+    'glE;': '\u2a92',
+    'glj;': '\u2aa4',
+    'gnap;': '\u2a8a',
+    'gnapprox;': '\u2a8a',
+    'gnE;': '\u2269',
+    'gne;': '\u2a88',
+    'gneq;': '\u2a88',
+    'gneqq;': '\u2269',
+    'gnsim;': '\u22e7',
+    'Gopf;': '\U0001d53e',
+    'gopf;': '\U0001d558',
+    'grave;': '`',
+    'GreaterEqual;': '\u2265',
+    'GreaterEqualLess;': '\u22db',
+    'GreaterFullEqual;': '\u2267',
+    'GreaterGreater;': '\u2aa2',
+    'GreaterLess;': '\u2277',
+    'GreaterSlantEqual;': '\u2a7e',
+    'GreaterTilde;': '\u2273',
+    'Gscr;': '\U0001d4a2',
+    'gscr;': '\u210a',
+    'gsim;': '\u2273',
+    'gsime;': '\u2a8e',
+    'gsiml;': '\u2a90',
+    'GT': '>',
+    'gt': '>',
+    'GT;': '>',
+    'Gt;': '\u226b',
+    'gt;': '>',
+    'gtcc;': '\u2aa7',
+    'gtcir;': '\u2a7a',
+    'gtdot;': '\u22d7',
+    'gtlPar;': '\u2995',
+    'gtquest;': '\u2a7c',
+    'gtrapprox;': '\u2a86',
+    'gtrarr;': '\u2978',
+    'gtrdot;': '\u22d7',
+    'gtreqless;': '\u22db',
+    'gtreqqless;': '\u2a8c',
+    'gtrless;': '\u2277',
+    'gtrsim;': '\u2273',
+    'gvertneqq;': '\u2269\ufe00',
+    'gvnE;': '\u2269\ufe00',
+    'Hacek;': '\u02c7',
+    'hairsp;': '\u200a',
+    'half;': '\xbd',
+    'hamilt;': '\u210b',
+    'HARDcy;': '\u042a',
+    'hardcy;': '\u044a',
+    'hArr;': '\u21d4',
+    'harr;': '\u2194',
+    'harrcir;': '\u2948',
+    'harrw;': '\u21ad',
+    'Hat;': '^',
+    'hbar;': '\u210f',
+    'Hcirc;': '\u0124',
+    'hcirc;': '\u0125',
+    'hearts;': '\u2665',
+    'heartsuit;': '\u2665',
+    'hellip;': '\u2026',
+    'hercon;': '\u22b9',
+    'Hfr;': '\u210c',
+    'hfr;': '\U0001d525',
+    'HilbertSpace;': '\u210b',
+    'hksearow;': '\u2925',
+    'hkswarow;': '\u2926',
+    'hoarr;': '\u21ff',
+    'homtht;': '\u223b',
+    'hookleftarrow;': '\u21a9',
+    'hookrightarrow;': '\u21aa',
+    'Hopf;': '\u210d',
+    'hopf;': '\U0001d559',
+    'horbar;': '\u2015',
+    'HorizontalLine;': '\u2500',
+    'Hscr;': '\u210b',
+    'hscr;': '\U0001d4bd',
+    'hslash;': '\u210f',
+    'Hstrok;': '\u0126',
+    'hstrok;': '\u0127',
+    'HumpDownHump;': '\u224e',
+    'HumpEqual;': '\u224f',
+    'hybull;': '\u2043',
+    'hyphen;': '\u2010',
+    'Iacute': '\xcd',
+    'iacute': '\xed',
+    'Iacute;': '\xcd',
+    'iacute;': '\xed',
+    'ic;': '\u2063',
+    'Icirc': '\xce',
+    'icirc': '\xee',
+    'Icirc;': '\xce',
+    'icirc;': '\xee',
+    'Icy;': '\u0418',
+    'icy;': '\u0438',
+    'Idot;': '\u0130',
+    'IEcy;': '\u0415',
+    'iecy;': '\u0435',
+    'iexcl': '\xa1',
+    'iexcl;': '\xa1',
+    'iff;': '\u21d4',
+    'Ifr;': '\u2111',
+    'ifr;': '\U0001d526',
+    'Igrave': '\xcc',
+    'igrave': '\xec',
+    'Igrave;': '\xcc',
+    'igrave;': '\xec',
+    'ii;': '\u2148',
+    'iiiint;': '\u2a0c',
+    'iiint;': '\u222d',
+    'iinfin;': '\u29dc',
+    'iiota;': '\u2129',
+    'IJlig;': '\u0132',
+    'ijlig;': '\u0133',
+    'Im;': '\u2111',
+    'Imacr;': '\u012a',
+    'imacr;': '\u012b',
+    'image;': '\u2111',
+    'ImaginaryI;': '\u2148',
+    'imagline;': '\u2110',
+    'imagpart;': '\u2111',
+    'imath;': '\u0131',
+    'imof;': '\u22b7',
+    'imped;': '\u01b5',
+    'Implies;': '\u21d2',
+    'in;': '\u2208',
+    'incare;': '\u2105',
+    'infin;': '\u221e',
+    'infintie;': '\u29dd',
+    'inodot;': '\u0131',
+    'Int;': '\u222c',
+    'int;': '\u222b',
+    'intcal;': '\u22ba',
+    'integers;': '\u2124',
+    'Integral;': '\u222b',
+    'intercal;': '\u22ba',
+    'Intersection;': '\u22c2',
+    'intlarhk;': '\u2a17',
+    'intprod;': '\u2a3c',
+    'InvisibleComma;': '\u2063',
+    'InvisibleTimes;': '\u2062',
+    'IOcy;': '\u0401',
+    'iocy;': '\u0451',
+    'Iogon;': '\u012e',
+    'iogon;': '\u012f',
+    'Iopf;': '\U0001d540',
+    'iopf;': '\U0001d55a',
+    'Iota;': '\u0399',
+    'iota;': '\u03b9',
+    'iprod;': '\u2a3c',
+    'iquest': '\xbf',
+    'iquest;': '\xbf',
+    'Iscr;': '\u2110',
+    'iscr;': '\U0001d4be',
+    'isin;': '\u2208',
+    'isindot;': '\u22f5',
+    'isinE;': '\u22f9',
+    'isins;': '\u22f4',
+    'isinsv;': '\u22f3',
+    'isinv;': '\u2208',
+    'it;': '\u2062',
+    'Itilde;': '\u0128',
+    'itilde;': '\u0129',
+    'Iukcy;': '\u0406',
+    'iukcy;': '\u0456',
+    'Iuml': '\xcf',
+    'iuml': '\xef',
+    'Iuml;': '\xcf',
+    'iuml;': '\xef',
+    'Jcirc;': '\u0134',
+    'jcirc;': '\u0135',
+    'Jcy;': '\u0419',
+    'jcy;': '\u0439',
+    'Jfr;': '\U0001d50d',
+    'jfr;': '\U0001d527',
+    'jmath;': '\u0237',
+    'Jopf;': '\U0001d541',
+    'jopf;': '\U0001d55b',
+    'Jscr;': '\U0001d4a5',
+    'jscr;': '\U0001d4bf',
+    'Jsercy;': '\u0408',
+    'jsercy;': '\u0458',
+    'Jukcy;': '\u0404',
+    'jukcy;': '\u0454',
+    'Kappa;': '\u039a',
+    'kappa;': '\u03ba',
+    'kappav;': '\u03f0',
+    'Kcedil;': '\u0136',
+    'kcedil;': '\u0137',
+    'Kcy;': '\u041a',
+    'kcy;': '\u043a',
+    'Kfr;': '\U0001d50e',
+    'kfr;': '\U0001d528',
+    'kgreen;': '\u0138',
+    'KHcy;': '\u0425',
+    'khcy;': '\u0445',
+    'KJcy;': '\u040c',
+    'kjcy;': '\u045c',
+    'Kopf;': '\U0001d542',
+    'kopf;': '\U0001d55c',
+    'Kscr;': '\U0001d4a6',
+    'kscr;': '\U0001d4c0',
+    'lAarr;': '\u21da',
+    'Lacute;': '\u0139',
+    'lacute;': '\u013a',
+    'laemptyv;': '\u29b4',
+    'lagran;': '\u2112',
+    'Lambda;': '\u039b',
+    'lambda;': '\u03bb',
+    'Lang;': '\u27ea',
+    'lang;': '\u27e8',
+    'langd;': '\u2991',
+    'langle;': '\u27e8',
+    'lap;': '\u2a85',
+    'Laplacetrf;': '\u2112',
+    'laquo': '\xab',
+    'laquo;': '\xab',
+    'Larr;': '\u219e',
+    'lArr;': '\u21d0',
+    'larr;': '\u2190',
+    'larrb;': '\u21e4',
+    'larrbfs;': '\u291f',
+    'larrfs;': '\u291d',
+    'larrhk;': '\u21a9',
+    'larrlp;': '\u21ab',
+    'larrpl;': '\u2939',
+    'larrsim;': '\u2973',
+    'larrtl;': '\u21a2',
+    'lat;': '\u2aab',
+    'lAtail;': '\u291b',
+    'latail;': '\u2919',
+    'late;': '\u2aad',
+    'lates;': '\u2aad\ufe00',
+    'lBarr;': '\u290e',
+    'lbarr;': '\u290c',
+    'lbbrk;': '\u2772',
+    'lbrace;': '{',
+    'lbrack;': '[',
+    'lbrke;': '\u298b',
+    'lbrksld;': '\u298f',
+    'lbrkslu;': '\u298d',
+    'Lcaron;': '\u013d',
+    'lcaron;': '\u013e',
+    'Lcedil;': '\u013b',
+    'lcedil;': '\u013c',
+    'lceil;': '\u2308',
+    'lcub;': '{',
+    'Lcy;': '\u041b',
+    'lcy;': '\u043b',
+    'ldca;': '\u2936',
+    'ldquo;': '\u201c',
+    'ldquor;': '\u201e',
+    'ldrdhar;': '\u2967',
+    'ldrushar;': '\u294b',
+    'ldsh;': '\u21b2',
+    'lE;': '\u2266',
+    'le;': '\u2264',
+    'LeftAngleBracket;': '\u27e8',
+    'LeftArrow;': '\u2190',
+    'Leftarrow;': '\u21d0',
+    'leftarrow;': '\u2190',
+    'LeftArrowBar;': '\u21e4',
+    'LeftArrowRightArrow;': '\u21c6',
+    'leftarrowtail;': '\u21a2',
+    'LeftCeiling;': '\u2308',
+    'LeftDoubleBracket;': '\u27e6',
+    'LeftDownTeeVector;': '\u2961',
+    'LeftDownVector;': '\u21c3',
+    'LeftDownVectorBar;': '\u2959',
+    'LeftFloor;': '\u230a',
+    'leftharpoondown;': '\u21bd',
+    'leftharpoonup;': '\u21bc',
+    'leftleftarrows;': '\u21c7',
+    'LeftRightArrow;': '\u2194',
+    'Leftrightarrow;': '\u21d4',
+    'leftrightarrow;': '\u2194',
+    'leftrightarrows;': '\u21c6',
+    'leftrightharpoons;': '\u21cb',
+    'leftrightsquigarrow;': '\u21ad',
+    'LeftRightVector;': '\u294e',
+    'LeftTee;': '\u22a3',
+    'LeftTeeArrow;': '\u21a4',
+    'LeftTeeVector;': '\u295a',
+    'leftthreetimes;': '\u22cb',
+    'LeftTriangle;': '\u22b2',
+    'LeftTriangleBar;': '\u29cf',
+    'LeftTriangleEqual;': '\u22b4',
+    'LeftUpDownVector;': '\u2951',
+    'LeftUpTeeVector;': '\u2960',
+    'LeftUpVector;': '\u21bf',
+    'LeftUpVectorBar;': '\u2958',
+    'LeftVector;': '\u21bc',
+    'LeftVectorBar;': '\u2952',
+    'lEg;': '\u2a8b',
+    'leg;': '\u22da',
+    'leq;': '\u2264',
+    'leqq;': '\u2266',
+    'leqslant;': '\u2a7d',
+    'les;': '\u2a7d',
+    'lescc;': '\u2aa8',
+    'lesdot;': '\u2a7f',
+    'lesdoto;': '\u2a81',
+    'lesdotor;': '\u2a83',
+    'lesg;': '\u22da\ufe00',
+    'lesges;': '\u2a93',
+    'lessapprox;': '\u2a85',
+    'lessdot;': '\u22d6',
+    'lesseqgtr;': '\u22da',
+    'lesseqqgtr;': '\u2a8b',
+    'LessEqualGreater;': '\u22da',
+    'LessFullEqual;': '\u2266',
+    'LessGreater;': '\u2276',
+    'lessgtr;': '\u2276',
+    'LessLess;': '\u2aa1',
+    'lesssim;': '\u2272',
+    'LessSlantEqual;': '\u2a7d',
+    'LessTilde;': '\u2272',
+    'lfisht;': '\u297c',
+    'lfloor;': '\u230a',
+    'Lfr;': '\U0001d50f',
+    'lfr;': '\U0001d529',
+    'lg;': '\u2276',
+    'lgE;': '\u2a91',
+    'lHar;': '\u2962',
+    'lhard;': '\u21bd',
+    'lharu;': '\u21bc',
+    'lharul;': '\u296a',
+    'lhblk;': '\u2584',
+    'LJcy;': '\u0409',
+    'ljcy;': '\u0459',
+    'Ll;': '\u22d8',
+    'll;': '\u226a',
+    'llarr;': '\u21c7',
+    'llcorner;': '\u231e',
+    'Lleftarrow;': '\u21da',
+    'llhard;': '\u296b',
+    'lltri;': '\u25fa',
+    'Lmidot;': '\u013f',
+    'lmidot;': '\u0140',
+    'lmoust;': '\u23b0',
+    'lmoustache;': '\u23b0',
+    'lnap;': '\u2a89',
+    'lnapprox;': '\u2a89',
+    'lnE;': '\u2268',
+    'lne;': '\u2a87',
+    'lneq;': '\u2a87',
+    'lneqq;': '\u2268',
+    'lnsim;': '\u22e6',
+    'loang;': '\u27ec',
+    'loarr;': '\u21fd',
+    'lobrk;': '\u27e6',
+    'LongLeftArrow;': '\u27f5',
+    'Longleftarrow;': '\u27f8',
+    'longleftarrow;': '\u27f5',
+    'LongLeftRightArrow;': '\u27f7',
+    'Longleftrightarrow;': '\u27fa',
+    'longleftrightarrow;': '\u27f7',
+    'longmapsto;': '\u27fc',
+    'LongRightArrow;': '\u27f6',
+    'Longrightarrow;': '\u27f9',
+    'longrightarrow;': '\u27f6',
+    'looparrowleft;': '\u21ab',
+    'looparrowright;': '\u21ac',
+    'lopar;': '\u2985',
+    'Lopf;': '\U0001d543',
+    'lopf;': '\U0001d55d',
+    'loplus;': '\u2a2d',
+    'lotimes;': '\u2a34',
+    'lowast;': '\u2217',
+    'lowbar;': '_',
+    'LowerLeftArrow;': '\u2199',
+    'LowerRightArrow;': '\u2198',
+    'loz;': '\u25ca',
+    'lozenge;': '\u25ca',
+    'lozf;': '\u29eb',
+    'lpar;': '(',
+    'lparlt;': '\u2993',
+    'lrarr;': '\u21c6',
+    'lrcorner;': '\u231f',
+    'lrhar;': '\u21cb',
+    'lrhard;': '\u296d',
+    'lrm;': '\u200e',
+    'lrtri;': '\u22bf',
+    'lsaquo;': '\u2039',
+    'Lscr;': '\u2112',
+    'lscr;': '\U0001d4c1',
+    'Lsh;': '\u21b0',
+    'lsh;': '\u21b0',
+    'lsim;': '\u2272',
+    'lsime;': '\u2a8d',
+    'lsimg;': '\u2a8f',
+    'lsqb;': '[',
+    'lsquo;': '\u2018',
+    'lsquor;': '\u201a',
+    'Lstrok;': '\u0141',
+    'lstrok;': '\u0142',
+    'LT': '<',
+    'lt': '<',
+    'LT;': '<',
+    'Lt;': '\u226a',
+    'lt;': '<',
+    'ltcc;': '\u2aa6',
+    'ltcir;': '\u2a79',
+    'ltdot;': '\u22d6',
+    'lthree;': '\u22cb',
+    'ltimes;': '\u22c9',
+    'ltlarr;': '\u2976',
+    'ltquest;': '\u2a7b',
+    'ltri;': '\u25c3',
+    'ltrie;': '\u22b4',
+    'ltrif;': '\u25c2',
+    'ltrPar;': '\u2996',
+    'lurdshar;': '\u294a',
+    'luruhar;': '\u2966',
+    'lvertneqq;': '\u2268\ufe00',
+    'lvnE;': '\u2268\ufe00',
+    'macr': '\xaf',
+    'macr;': '\xaf',
+    'male;': '\u2642',
+    'malt;': '\u2720',
+    'maltese;': '\u2720',
+    'Map;': '\u2905',
+    'map;': '\u21a6',
+    'mapsto;': '\u21a6',
+    'mapstodown;': '\u21a7',
+    'mapstoleft;': '\u21a4',
+    'mapstoup;': '\u21a5',
+    'marker;': '\u25ae',
+    'mcomma;': '\u2a29',
+    'Mcy;': '\u041c',
+    'mcy;': '\u043c',
+    'mdash;': '\u2014',
+    'mDDot;': '\u223a',
+    'measuredangle;': '\u2221',
+    'MediumSpace;': '\u205f',
+    'Mellintrf;': '\u2133',
+    'Mfr;': '\U0001d510',
+    'mfr;': '\U0001d52a',
+    'mho;': '\u2127',
+    'micro': '\xb5',
+    'micro;': '\xb5',
+    'mid;': '\u2223',
+    'midast;': '*',
+    'midcir;': '\u2af0',
+    'middot': '\xb7',
+    'middot;': '\xb7',
+    'minus;': '\u2212',
+    'minusb;': '\u229f',
+    'minusd;': '\u2238',
+    'minusdu;': '\u2a2a',
+    'MinusPlus;': '\u2213',
+    'mlcp;': '\u2adb',
+    'mldr;': '\u2026',
+    'mnplus;': '\u2213',
+    'models;': '\u22a7',
+    'Mopf;': '\U0001d544',
+    'mopf;': '\U0001d55e',
+    'mp;': '\u2213',
+    'Mscr;': '\u2133',
+    'mscr;': '\U0001d4c2',
+    'mstpos;': '\u223e',
+    'Mu;': '\u039c',
+    'mu;': '\u03bc',
+    'multimap;': '\u22b8',
+    'mumap;': '\u22b8',
+    'nabla;': '\u2207',
+    'Nacute;': '\u0143',
+    'nacute;': '\u0144',
+    'nang;': '\u2220\u20d2',
+    'nap;': '\u2249',
+    'napE;': '\u2a70\u0338',
+    'napid;': '\u224b\u0338',
+    'napos;': '\u0149',
+    'napprox;': '\u2249',
+    'natur;': '\u266e',
+    'natural;': '\u266e',
+    'naturals;': '\u2115',
+    'nbsp': '\xa0',
+    'nbsp;': '\xa0',
+    'nbump;': '\u224e\u0338',
+    'nbumpe;': '\u224f\u0338',
+    'ncap;': '\u2a43',
+    'Ncaron;': '\u0147',
+    'ncaron;': '\u0148',
+    'Ncedil;': '\u0145',
+    'ncedil;': '\u0146',
+    'ncong;': '\u2247',
+    'ncongdot;': '\u2a6d\u0338',
+    'ncup;': '\u2a42',
+    'Ncy;': '\u041d',
+    'ncy;': '\u043d',
+    'ndash;': '\u2013',
+    'ne;': '\u2260',
+    'nearhk;': '\u2924',
+    'neArr;': '\u21d7',
+    'nearr;': '\u2197',
+    'nearrow;': '\u2197',
+    'nedot;': '\u2250\u0338',
+    'NegativeMediumSpace;': '\u200b',
+    'NegativeThickSpace;': '\u200b',
+    'NegativeThinSpace;': '\u200b',
+    'NegativeVeryThinSpace;': '\u200b',
+    'nequiv;': '\u2262',
+    'nesear;': '\u2928',
+    'nesim;': '\u2242\u0338',
+    'NestedGreaterGreater;': '\u226b',
+    'NestedLessLess;': '\u226a',
+    'NewLine;': '\n',
+    'nexist;': '\u2204',
+    'nexists;': '\u2204',
+    'Nfr;': '\U0001d511',
+    'nfr;': '\U0001d52b',
+    'ngE;': '\u2267\u0338',
+    'nge;': '\u2271',
+    'ngeq;': '\u2271',
+    'ngeqq;': '\u2267\u0338',
+    'ngeqslant;': '\u2a7e\u0338',
+    'nges;': '\u2a7e\u0338',
+    'nGg;': '\u22d9\u0338',
+    'ngsim;': '\u2275',
+    'nGt;': '\u226b\u20d2',
+    'ngt;': '\u226f',
+    'ngtr;': '\u226f',
+    'nGtv;': '\u226b\u0338',
+    'nhArr;': '\u21ce',
+    'nharr;': '\u21ae',
+    'nhpar;': '\u2af2',
+    'ni;': '\u220b',
+    'nis;': '\u22fc',
+    'nisd;': '\u22fa',
+    'niv;': '\u220b',
+    'NJcy;': '\u040a',
+    'njcy;': '\u045a',
+    'nlArr;': '\u21cd',
+    'nlarr;': '\u219a',
+    'nldr;': '\u2025',
+    'nlE;': '\u2266\u0338',
+    'nle;': '\u2270',
+    'nLeftarrow;': '\u21cd',
+    'nleftarrow;': '\u219a',
+    'nLeftrightarrow;': '\u21ce',
+    'nleftrightarrow;': '\u21ae',
+    'nleq;': '\u2270',
+    'nleqq;': '\u2266\u0338',
+    'nleqslant;': '\u2a7d\u0338',
+    'nles;': '\u2a7d\u0338',
+    'nless;': '\u226e',
+    'nLl;': '\u22d8\u0338',
+    'nlsim;': '\u2274',
+    'nLt;': '\u226a\u20d2',
+    'nlt;': '\u226e',
+    'nltri;': '\u22ea',
+    'nltrie;': '\u22ec',
+    'nLtv;': '\u226a\u0338',
+    'nmid;': '\u2224',
+    'NoBreak;': '\u2060',
+    'NonBreakingSpace;': '\xa0',
+    'Nopf;': '\u2115',
+    'nopf;': '\U0001d55f',
+    'not': '\xac',
+    'Not;': '\u2aec',
+    'not;': '\xac',
+    'NotCongruent;': '\u2262',
+    'NotCupCap;': '\u226d',
+    'NotDoubleVerticalBar;': '\u2226',
+    'NotElement;': '\u2209',
+    'NotEqual;': '\u2260',
+    'NotEqualTilde;': '\u2242\u0338',
+    'NotExists;': '\u2204',
+    'NotGreater;': '\u226f',
+    'NotGreaterEqual;': '\u2271',
+    'NotGreaterFullEqual;': '\u2267\u0338',
+    'NotGreaterGreater;': '\u226b\u0338',
+    'NotGreaterLess;': '\u2279',
+    'NotGreaterSlantEqual;': '\u2a7e\u0338',
+    'NotGreaterTilde;': '\u2275',
+    'NotHumpDownHump;': '\u224e\u0338',
+    'NotHumpEqual;': '\u224f\u0338',
+    'notin;': '\u2209',
+    'notindot;': '\u22f5\u0338',
+    'notinE;': '\u22f9\u0338',
+    'notinva;': '\u2209',
+    'notinvb;': '\u22f7',
+    'notinvc;': '\u22f6',
+    'NotLeftTriangle;': '\u22ea',
+    'NotLeftTriangleBar;': '\u29cf\u0338',
+    'NotLeftTriangleEqual;': '\u22ec',
+    'NotLess;': '\u226e',
+    'NotLessEqual;': '\u2270',
+    'NotLessGreater;': '\u2278',
+    'NotLessLess;': '\u226a\u0338',
+    'NotLessSlantEqual;': '\u2a7d\u0338',
+    'NotLessTilde;': '\u2274',
+    'NotNestedGreaterGreater;': '\u2aa2\u0338',
+    'NotNestedLessLess;': '\u2aa1\u0338',
+    'notni;': '\u220c',
+    'notniva;': '\u220c',
+    'notnivb;': '\u22fe',
+    'notnivc;': '\u22fd',
+    'NotPrecedes;': '\u2280',
+    'NotPrecedesEqual;': '\u2aaf\u0338',
+    'NotPrecedesSlantEqual;': '\u22e0',
+    'NotReverseElement;': '\u220c',
+    'NotRightTriangle;': '\u22eb',
+    'NotRightTriangleBar;': '\u29d0\u0338',
+    'NotRightTriangleEqual;': '\u22ed',
+    'NotSquareSubset;': '\u228f\u0338',
+    'NotSquareSubsetEqual;': '\u22e2',
+    'NotSquareSuperset;': '\u2290\u0338',
+    'NotSquareSupersetEqual;': '\u22e3',
+    'NotSubset;': '\u2282\u20d2',
+    'NotSubsetEqual;': '\u2288',
+    'NotSucceeds;': '\u2281',
+    'NotSucceedsEqual;': '\u2ab0\u0338',
+    'NotSucceedsSlantEqual;': '\u22e1',
+    'NotSucceedsTilde;': '\u227f\u0338',
+    'NotSuperset;': '\u2283\u20d2',
+    'NotSupersetEqual;': '\u2289',
+    'NotTilde;': '\u2241',
+    'NotTildeEqual;': '\u2244',
+    'NotTildeFullEqual;': '\u2247',
+    'NotTildeTilde;': '\u2249',
+    'NotVerticalBar;': '\u2224',
+    'npar;': '\u2226',
+    'nparallel;': '\u2226',
+    'nparsl;': '\u2afd\u20e5',
+    'npart;': '\u2202\u0338',
+    'npolint;': '\u2a14',
+    'npr;': '\u2280',
+    'nprcue;': '\u22e0',
+    'npre;': '\u2aaf\u0338',
+    'nprec;': '\u2280',
+    'npreceq;': '\u2aaf\u0338',
+    'nrArr;': '\u21cf',
+    'nrarr;': '\u219b',
+    'nrarrc;': '\u2933\u0338',
+    'nrarrw;': '\u219d\u0338',
+    'nRightarrow;': '\u21cf',
+    'nrightarrow;': '\u219b',
+    'nrtri;': '\u22eb',
+    'nrtrie;': '\u22ed',
+    'nsc;': '\u2281',
+    'nsccue;': '\u22e1',
+    'nsce;': '\u2ab0\u0338',
+    'Nscr;': '\U0001d4a9',
+    'nscr;': '\U0001d4c3',
+    'nshortmid;': '\u2224',
+    'nshortparallel;': '\u2226',
+    'nsim;': '\u2241',
+    'nsime;': '\u2244',
+    'nsimeq;': '\u2244',
+    'nsmid;': '\u2224',
+    'nspar;': '\u2226',
+    'nsqsube;': '\u22e2',
+    'nsqsupe;': '\u22e3',
+    'nsub;': '\u2284',
+    'nsubE;': '\u2ac5\u0338',
+    'nsube;': '\u2288',
+    'nsubset;': '\u2282\u20d2',
+    'nsubseteq;': '\u2288',
+    'nsubseteqq;': '\u2ac5\u0338',
+    'nsucc;': '\u2281',
+    'nsucceq;': '\u2ab0\u0338',
+    'nsup;': '\u2285',
+    'nsupE;': '\u2ac6\u0338',
+    'nsupe;': '\u2289',
+    'nsupset;': '\u2283\u20d2',
+    'nsupseteq;': '\u2289',
+    'nsupseteqq;': '\u2ac6\u0338',
+    'ntgl;': '\u2279',
+    'Ntilde': '\xd1',
+    'ntilde': '\xf1',
+    'Ntilde;': '\xd1',
+    'ntilde;': '\xf1',
+    'ntlg;': '\u2278',
+    'ntriangleleft;': '\u22ea',
+    'ntrianglelefteq;': '\u22ec',
+    'ntriangleright;': '\u22eb',
+    'ntrianglerighteq;': '\u22ed',
+    'Nu;': '\u039d',
+    'nu;': '\u03bd',
+    'num;': '#',
+    'numero;': '\u2116',
+    'numsp;': '\u2007',
+    'nvap;': '\u224d\u20d2',
+    'nVDash;': '\u22af',
+    'nVdash;': '\u22ae',
+    'nvDash;': '\u22ad',
+    'nvdash;': '\u22ac',
+    'nvge;': '\u2265\u20d2',
+    'nvgt;': '>\u20d2',
+    'nvHarr;': '\u2904',
+    'nvinfin;': '\u29de',
+    'nvlArr;': '\u2902',
+    'nvle;': '\u2264\u20d2',
+    'nvlt;': '<\u20d2',
+    'nvltrie;': '\u22b4\u20d2',
+    'nvrArr;': '\u2903',
+    'nvrtrie;': '\u22b5\u20d2',
+    'nvsim;': '\u223c\u20d2',
+    'nwarhk;': '\u2923',
+    'nwArr;': '\u21d6',
+    'nwarr;': '\u2196',
+    'nwarrow;': '\u2196',
+    'nwnear;': '\u2927',
+    'Oacute': '\xd3',
+    'oacute': '\xf3',
+    'Oacute;': '\xd3',
+    'oacute;': '\xf3',
+    'oast;': '\u229b',
+    'ocir;': '\u229a',
+    'Ocirc': '\xd4',
+    'ocirc': '\xf4',
+    'Ocirc;': '\xd4',
+    'ocirc;': '\xf4',
+    'Ocy;': '\u041e',
+    'ocy;': '\u043e',
+    'odash;': '\u229d',
+    'Odblac;': '\u0150',
+    'odblac;': '\u0151',
+    'odiv;': '\u2a38',
+    'odot;': '\u2299',
+    'odsold;': '\u29bc',
+    'OElig;': '\u0152',
+    'oelig;': '\u0153',
+    'ofcir;': '\u29bf',
+    'Ofr;': '\U0001d512',
+    'ofr;': '\U0001d52c',
+    'ogon;': '\u02db',
+    'Ograve': '\xd2',
+    'ograve': '\xf2',
+    'Ograve;': '\xd2',
+    'ograve;': '\xf2',
+    'ogt;': '\u29c1',
+    'ohbar;': '\u29b5',
+    'ohm;': '\u03a9',
+    'oint;': '\u222e',
+    'olarr;': '\u21ba',
+    'olcir;': '\u29be',
+    'olcross;': '\u29bb',
+    'oline;': '\u203e',
+    'olt;': '\u29c0',
+    'Omacr;': '\u014c',
+    'omacr;': '\u014d',
+    'Omega;': '\u03a9',
+    'omega;': '\u03c9',
+    'Omicron;': '\u039f',
+    'omicron;': '\u03bf',
+    'omid;': '\u29b6',
+    'ominus;': '\u2296',
+    'Oopf;': '\U0001d546',
+    'oopf;': '\U0001d560',
+    'opar;': '\u29b7',
+    'OpenCurlyDoubleQuote;': '\u201c',
+    'OpenCurlyQuote;': '\u2018',
+    'operp;': '\u29b9',
+    'oplus;': '\u2295',
+    'Or;': '\u2a54',
+    'or;': '\u2228',
+    'orarr;': '\u21bb',
+    'ord;': '\u2a5d',
+    'order;': '\u2134',
+    'orderof;': '\u2134',
+    'ordf': '\xaa',
+    'ordf;': '\xaa',
+    'ordm': '\xba',
+    'ordm;': '\xba',
+    'origof;': '\u22b6',
+    'oror;': '\u2a56',
+    'orslope;': '\u2a57',
+    'orv;': '\u2a5b',
+    'oS;': '\u24c8',
+    'Oscr;': '\U0001d4aa',
+    'oscr;': '\u2134',
+    'Oslash': '\xd8',
+    'oslash': '\xf8',
+    'Oslash;': '\xd8',
+    'oslash;': '\xf8',
+    'osol;': '\u2298',
+    'Otilde': '\xd5',
+    'otilde': '\xf5',
+    'Otilde;': '\xd5',
+    'otilde;': '\xf5',
+    'Otimes;': '\u2a37',
+    'otimes;': '\u2297',
+    'otimesas;': '\u2a36',
+    'Ouml': '\xd6',
+    'ouml': '\xf6',
+    'Ouml;': '\xd6',
+    'ouml;': '\xf6',
+    'ovbar;': '\u233d',
+    'OverBar;': '\u203e',
+    'OverBrace;': '\u23de',
+    'OverBracket;': '\u23b4',
+    'OverParenthesis;': '\u23dc',
+    'par;': '\u2225',
+    'para': '\xb6',
+    'para;': '\xb6',
+    'parallel;': '\u2225',
+    'parsim;': '\u2af3',
+    'parsl;': '\u2afd',
+    'part;': '\u2202',
+    'PartialD;': '\u2202',
+    'Pcy;': '\u041f',
+    'pcy;': '\u043f',
+    'percnt;': '%',
+    'period;': '.',
+    'permil;': '\u2030',
+    'perp;': '\u22a5',
+    'pertenk;': '\u2031',
+    'Pfr;': '\U0001d513',
+    'pfr;': '\U0001d52d',
+    'Phi;': '\u03a6',
+    'phi;': '\u03c6',
+    'phiv;': '\u03d5',
+    'phmmat;': '\u2133',
+    'phone;': '\u260e',
+    'Pi;': '\u03a0',
+    'pi;': '\u03c0',
+    'pitchfork;': '\u22d4',
+    'piv;': '\u03d6',
+    'planck;': '\u210f',
+    'planckh;': '\u210e',
+    'plankv;': '\u210f',
+    'plus;': '+',
+    'plusacir;': '\u2a23',
+    'plusb;': '\u229e',
+    'pluscir;': '\u2a22',
+    'plusdo;': '\u2214',
+    'plusdu;': '\u2a25',
+    'pluse;': '\u2a72',
+    'PlusMinus;': '\xb1',
+    'plusmn': '\xb1',
+    'plusmn;': '\xb1',
+    'plussim;': '\u2a26',
+    'plustwo;': '\u2a27',
+    'pm;': '\xb1',
+    'Poincareplane;': '\u210c',
+    'pointint;': '\u2a15',
+    'Popf;': '\u2119',
+    'popf;': '\U0001d561',
+    'pound': '\xa3',
+    'pound;': '\xa3',
+    'Pr;': '\u2abb',
+    'pr;': '\u227a',
+    'prap;': '\u2ab7',
+    'prcue;': '\u227c',
+    'prE;': '\u2ab3',
+    'pre;': '\u2aaf',
+    'prec;': '\u227a',
+    'precapprox;': '\u2ab7',
+    'preccurlyeq;': '\u227c',
+    'Precedes;': '\u227a',
+    'PrecedesEqual;': '\u2aaf',
+    'PrecedesSlantEqual;': '\u227c',
+    'PrecedesTilde;': '\u227e',
+    'preceq;': '\u2aaf',
+    'precnapprox;': '\u2ab9',
+    'precneqq;': '\u2ab5',
+    'precnsim;': '\u22e8',
+    'precsim;': '\u227e',
+    'Prime;': '\u2033',
+    'prime;': '\u2032',
+    'primes;': '\u2119',
+    'prnap;': '\u2ab9',
+    'prnE;': '\u2ab5',
+    'prnsim;': '\u22e8',
+    'prod;': '\u220f',
+    'Product;': '\u220f',
+    'profalar;': '\u232e',
+    'profline;': '\u2312',
+    'profsurf;': '\u2313',
+    'prop;': '\u221d',
+    'Proportion;': '\u2237',
+    'Proportional;': '\u221d',
+    'propto;': '\u221d',
+    'prsim;': '\u227e',
+    'prurel;': '\u22b0',
+    'Pscr;': '\U0001d4ab',
+    'pscr;': '\U0001d4c5',
+    'Psi;': '\u03a8',
+    'psi;': '\u03c8',
+    'puncsp;': '\u2008',
+    'Qfr;': '\U0001d514',
+    'qfr;': '\U0001d52e',
+    'qint;': '\u2a0c',
+    'Qopf;': '\u211a',
+    'qopf;': '\U0001d562',
+    'qprime;': '\u2057',
+    'Qscr;': '\U0001d4ac',
+    'qscr;': '\U0001d4c6',
+    'quaternions;': '\u210d',
+    'quatint;': '\u2a16',
+    'quest;': '?',
+    'questeq;': '\u225f',
+    'QUOT': '"',
+    'quot': '"',
+    'QUOT;': '"',
+    'quot;': '"',
+    'rAarr;': '\u21db',
+    'race;': '\u223d\u0331',
+    'Racute;': '\u0154',
+    'racute;': '\u0155',
+    'radic;': '\u221a',
+    'raemptyv;': '\u29b3',
+    'Rang;': '\u27eb',
+    'rang;': '\u27e9',
+    'rangd;': '\u2992',
+    'range;': '\u29a5',
+    'rangle;': '\u27e9',
+    'raquo': '\xbb',
+    'raquo;': '\xbb',
+    'Rarr;': '\u21a0',
+    'rArr;': '\u21d2',
+    'rarr;': '\u2192',
+    'rarrap;': '\u2975',
+    'rarrb;': '\u21e5',
+    'rarrbfs;': '\u2920',
+    'rarrc;': '\u2933',
+    'rarrfs;': '\u291e',
+    'rarrhk;': '\u21aa',
+    'rarrlp;': '\u21ac',
+    'rarrpl;': '\u2945',
+    'rarrsim;': '\u2974',
+    'Rarrtl;': '\u2916',
+    'rarrtl;': '\u21a3',
+    'rarrw;': '\u219d',
+    'rAtail;': '\u291c',
+    'ratail;': '\u291a',
+    'ratio;': '\u2236',
+    'rationals;': '\u211a',
+    'RBarr;': '\u2910',
+    'rBarr;': '\u290f',
+    'rbarr;': '\u290d',
+    'rbbrk;': '\u2773',
+    'rbrace;': '}',
+    'rbrack;': ']',
+    'rbrke;': '\u298c',
+    'rbrksld;': '\u298e',
+    'rbrkslu;': '\u2990',
+    'Rcaron;': '\u0158',
+    'rcaron;': '\u0159',
+    'Rcedil;': '\u0156',
+    'rcedil;': '\u0157',
+    'rceil;': '\u2309',
+    'rcub;': '}',
+    'Rcy;': '\u0420',
+    'rcy;': '\u0440',
+    'rdca;': '\u2937',
+    'rdldhar;': '\u2969',
+    'rdquo;': '\u201d',
+    'rdquor;': '\u201d',
+    'rdsh;': '\u21b3',
+    'Re;': '\u211c',
+    'real;': '\u211c',
+    'realine;': '\u211b',
+    'realpart;': '\u211c',
+    'reals;': '\u211d',
+    'rect;': '\u25ad',
+    'REG': '\xae',
+    'reg': '\xae',
+    'REG;': '\xae',
+    'reg;': '\xae',
+    'ReverseElement;': '\u220b',
+    'ReverseEquilibrium;': '\u21cb',
+    'ReverseUpEquilibrium;': '\u296f',
+    'rfisht;': '\u297d',
+    'rfloor;': '\u230b',
+    'Rfr;': '\u211c',
+    'rfr;': '\U0001d52f',
+    'rHar;': '\u2964',
+    'rhard;': '\u21c1',
+    'rharu;': '\u21c0',
+    'rharul;': '\u296c',
+    'Rho;': '\u03a1',
+    'rho;': '\u03c1',
+    'rhov;': '\u03f1',
+    'RightAngleBracket;': '\u27e9',
+    'RightArrow;': '\u2192',
+    'Rightarrow;': '\u21d2',
+    'rightarrow;': '\u2192',
+    'RightArrowBar;': '\u21e5',
+    'RightArrowLeftArrow;': '\u21c4',
+    'rightarrowtail;': '\u21a3',
+    'RightCeiling;': '\u2309',
+    'RightDoubleBracket;': '\u27e7',
+    'RightDownTeeVector;': '\u295d',
+    'RightDownVector;': '\u21c2',
+    'RightDownVectorBar;': '\u2955',
+    'RightFloor;': '\u230b',
+    'rightharpoondown;': '\u21c1',
+    'rightharpoonup;': '\u21c0',
+    'rightleftarrows;': '\u21c4',
+    'rightleftharpoons;': '\u21cc',
+    'rightrightarrows;': '\u21c9',
+    'rightsquigarrow;': '\u219d',
+    'RightTee;': '\u22a2',
+    'RightTeeArrow;': '\u21a6',
+    'RightTeeVector;': '\u295b',
+    'rightthreetimes;': '\u22cc',
+    'RightTriangle;': '\u22b3',
+    'RightTriangleBar;': '\u29d0',
+    'RightTriangleEqual;': '\u22b5',
+    'RightUpDownVector;': '\u294f',
+    'RightUpTeeVector;': '\u295c',
+    'RightUpVector;': '\u21be',
+    'RightUpVectorBar;': '\u2954',
+    'RightVector;': '\u21c0',
+    'RightVectorBar;': '\u2953',
+    'ring;': '\u02da',
+    'risingdotseq;': '\u2253',
+    'rlarr;': '\u21c4',
+    'rlhar;': '\u21cc',
+    'rlm;': '\u200f',
+    'rmoust;': '\u23b1',
+    'rmoustache;': '\u23b1',
+    'rnmid;': '\u2aee',
+    'roang;': '\u27ed',
+    'roarr;': '\u21fe',
+    'robrk;': '\u27e7',
+    'ropar;': '\u2986',
+    'Ropf;': '\u211d',
+    'ropf;': '\U0001d563',
+    'roplus;': '\u2a2e',
+    'rotimes;': '\u2a35',
+    'RoundImplies;': '\u2970',
+    'rpar;': ')',
+    'rpargt;': '\u2994',
+    'rppolint;': '\u2a12',
+    'rrarr;': '\u21c9',
+    'Rrightarrow;': '\u21db',
+    'rsaquo;': '\u203a',
+    'Rscr;': '\u211b',
+    'rscr;': '\U0001d4c7',
+    'Rsh;': '\u21b1',
+    'rsh;': '\u21b1',
+    'rsqb;': ']',
+    'rsquo;': '\u2019',
+    'rsquor;': '\u2019',
+    'rthree;': '\u22cc',
+    'rtimes;': '\u22ca',
+    'rtri;': '\u25b9',
+    'rtrie;': '\u22b5',
+    'rtrif;': '\u25b8',
+    'rtriltri;': '\u29ce',
+    'RuleDelayed;': '\u29f4',
+    'ruluhar;': '\u2968',
+    'rx;': '\u211e',
+    'Sacute;': '\u015a',
+    'sacute;': '\u015b',
+    'sbquo;': '\u201a',
+    'Sc;': '\u2abc',
+    'sc;': '\u227b',
+    'scap;': '\u2ab8',
+    'Scaron;': '\u0160',
+    'scaron;': '\u0161',
+    'sccue;': '\u227d',
+    'scE;': '\u2ab4',
+    'sce;': '\u2ab0',
+    'Scedil;': '\u015e',
+    'scedil;': '\u015f',
+    'Scirc;': '\u015c',
+    'scirc;': '\u015d',
+    'scnap;': '\u2aba',
+    'scnE;': '\u2ab6',
+    'scnsim;': '\u22e9',
+    'scpolint;': '\u2a13',
+    'scsim;': '\u227f',
+    'Scy;': '\u0421',
+    'scy;': '\u0441',
+    'sdot;': '\u22c5',
+    'sdotb;': '\u22a1',
+    'sdote;': '\u2a66',
+    'searhk;': '\u2925',
+    'seArr;': '\u21d8',
+    'searr;': '\u2198',
+    'searrow;': '\u2198',
+    'sect': '\xa7',
+    'sect;': '\xa7',
+    'semi;': ';',
+    'seswar;': '\u2929',
+    'setminus;': '\u2216',
+    'setmn;': '\u2216',
+    'sext;': '\u2736',
+    'Sfr;': '\U0001d516',
+    'sfr;': '\U0001d530',
+    'sfrown;': '\u2322',
+    'sharp;': '\u266f',
+    'SHCHcy;': '\u0429',
+    'shchcy;': '\u0449',
+    'SHcy;': '\u0428',
+    'shcy;': '\u0448',
+    'ShortDownArrow;': '\u2193',
+    'ShortLeftArrow;': '\u2190',
+    'shortmid;': '\u2223',
+    'shortparallel;': '\u2225',
+    'ShortRightArrow;': '\u2192',
+    'ShortUpArrow;': '\u2191',
+    'shy': '\xad',
+    'shy;': '\xad',
+    'Sigma;': '\u03a3',
+    'sigma;': '\u03c3',
+    'sigmaf;': '\u03c2',
+    'sigmav;': '\u03c2',
+    'sim;': '\u223c',
+    'simdot;': '\u2a6a',
+    'sime;': '\u2243',
+    'simeq;': '\u2243',
+    'simg;': '\u2a9e',
+    'simgE;': '\u2aa0',
+    'siml;': '\u2a9d',
+    'simlE;': '\u2a9f',
+    'simne;': '\u2246',
+    'simplus;': '\u2a24',
+    'simrarr;': '\u2972',
+    'slarr;': '\u2190',
+    'SmallCircle;': '\u2218',
+    'smallsetminus;': '\u2216',
+    'smashp;': '\u2a33',
+    'smeparsl;': '\u29e4',
+    'smid;': '\u2223',
+    'smile;': '\u2323',
+    'smt;': '\u2aaa',
+    'smte;': '\u2aac',
+    'smtes;': '\u2aac\ufe00',
+    'SOFTcy;': '\u042c',
+    'softcy;': '\u044c',
+    'sol;': '/',
+    'solb;': '\u29c4',
+    'solbar;': '\u233f',
+    'Sopf;': '\U0001d54a',
+    'sopf;': '\U0001d564',
+    'spades;': '\u2660',
+    'spadesuit;': '\u2660',
+    'spar;': '\u2225',
+    'sqcap;': '\u2293',
+    'sqcaps;': '\u2293\ufe00',
+    'sqcup;': '\u2294',
+    'sqcups;': '\u2294\ufe00',
+    'Sqrt;': '\u221a',
+    'sqsub;': '\u228f',
+    'sqsube;': '\u2291',
+    'sqsubset;': '\u228f',
+    'sqsubseteq;': '\u2291',
+    'sqsup;': '\u2290',
+    'sqsupe;': '\u2292',
+    'sqsupset;': '\u2290',
+    'sqsupseteq;': '\u2292',
+    'squ;': '\u25a1',
+    'Square;': '\u25a1',
+    'square;': '\u25a1',
+    'SquareIntersection;': '\u2293',
+    'SquareSubset;': '\u228f',
+    'SquareSubsetEqual;': '\u2291',
+    'SquareSuperset;': '\u2290',
+    'SquareSupersetEqual;': '\u2292',
+    'SquareUnion;': '\u2294',
+    'squarf;': '\u25aa',
+    'squf;': '\u25aa',
+    'srarr;': '\u2192',
+    'Sscr;': '\U0001d4ae',
+    'sscr;': '\U0001d4c8',
+    'ssetmn;': '\u2216',
+    'ssmile;': '\u2323',
+    'sstarf;': '\u22c6',
+    'Star;': '\u22c6',
+    'star;': '\u2606',
+    'starf;': '\u2605',
+    'straightepsilon;': '\u03f5',
+    'straightphi;': '\u03d5',
+    'strns;': '\xaf',
+    'Sub;': '\u22d0',
+    'sub;': '\u2282',
+    'subdot;': '\u2abd',
+    'subE;': '\u2ac5',
+    'sube;': '\u2286',
+    'subedot;': '\u2ac3',
+    'submult;': '\u2ac1',
+    'subnE;': '\u2acb',
+    'subne;': '\u228a',
+    'subplus;': '\u2abf',
+    'subrarr;': '\u2979',
+    'Subset;': '\u22d0',
+    'subset;': '\u2282',
+    'subseteq;': '\u2286',
+    'subseteqq;': '\u2ac5',
+    'SubsetEqual;': '\u2286',
+    'subsetneq;': '\u228a',
+    'subsetneqq;': '\u2acb',
+    'subsim;': '\u2ac7',
+    'subsub;': '\u2ad5',
+    'subsup;': '\u2ad3',
+    'succ;': '\u227b',
+    'succapprox;': '\u2ab8',
+    'succcurlyeq;': '\u227d',
+    'Succeeds;': '\u227b',
+    'SucceedsEqual;': '\u2ab0',
+    'SucceedsSlantEqual;': '\u227d',
+    'SucceedsTilde;': '\u227f',
+    'succeq;': '\u2ab0',
+    'succnapprox;': '\u2aba',
+    'succneqq;': '\u2ab6',
+    'succnsim;': '\u22e9',
+    'succsim;': '\u227f',
+    'SuchThat;': '\u220b',
+    'Sum;': '\u2211',
+    'sum;': '\u2211',
+    'sung;': '\u266a',
+    'sup1': '\xb9',
+    'sup1;': '\xb9',
+    'sup2': '\xb2',
+    'sup2;': '\xb2',
+    'sup3': '\xb3',
+    'sup3;': '\xb3',
+    'Sup;': '\u22d1',
+    'sup;': '\u2283',
+    'supdot;': '\u2abe',
+    'supdsub;': '\u2ad8',
+    'supE;': '\u2ac6',
+    'supe;': '\u2287',
+    'supedot;': '\u2ac4',
+    'Superset;': '\u2283',
+    'SupersetEqual;': '\u2287',
+    'suphsol;': '\u27c9',
+    'suphsub;': '\u2ad7',
+    'suplarr;': '\u297b',
+    'supmult;': '\u2ac2',
+    'supnE;': '\u2acc',
+    'supne;': '\u228b',
+    'supplus;': '\u2ac0',
+    'Supset;': '\u22d1',
+    'supset;': '\u2283',
+    'supseteq;': '\u2287',
+    'supseteqq;': '\u2ac6',
+    'supsetneq;': '\u228b',
+    'supsetneqq;': '\u2acc',
+    'supsim;': '\u2ac8',
+    'supsub;': '\u2ad4',
+    'supsup;': '\u2ad6',
+    'swarhk;': '\u2926',
+    'swArr;': '\u21d9',
+    'swarr;': '\u2199',
+    'swarrow;': '\u2199',
+    'swnwar;': '\u292a',
+    'szlig': '\xdf',
+    'szlig;': '\xdf',
+    'Tab;': '\t',
+    'target;': '\u2316',
+    'Tau;': '\u03a4',
+    'tau;': '\u03c4',
+    'tbrk;': '\u23b4',
+    'Tcaron;': '\u0164',
+    'tcaron;': '\u0165',
+    'Tcedil;': '\u0162',
+    'tcedil;': '\u0163',
+    'Tcy;': '\u0422',
+    'tcy;': '\u0442',
+    'tdot;': '\u20db',
+    'telrec;': '\u2315',
+    'Tfr;': '\U0001d517',
+    'tfr;': '\U0001d531',
+    'there4;': '\u2234',
+    'Therefore;': '\u2234',
+    'therefore;': '\u2234',
+    'Theta;': '\u0398',
+    'theta;': '\u03b8',
+    'thetasym;': '\u03d1',
+    'thetav;': '\u03d1',
+    'thickapprox;': '\u2248',
+    'thicksim;': '\u223c',
+    'ThickSpace;': '\u205f\u200a',
+    'thinsp;': '\u2009',
+    'ThinSpace;': '\u2009',
+    'thkap;': '\u2248',
+    'thksim;': '\u223c',
+    'THORN': '\xde',
+    'thorn': '\xfe',
+    'THORN;': '\xde',
+    'thorn;': '\xfe',
+    'Tilde;': '\u223c',
+    'tilde;': '\u02dc',
+    'TildeEqual;': '\u2243',
+    'TildeFullEqual;': '\u2245',
+    'TildeTilde;': '\u2248',
+    'times': '\xd7',
+    'times;': '\xd7',
+    'timesb;': '\u22a0',
+    'timesbar;': '\u2a31',
+    'timesd;': '\u2a30',
+    'tint;': '\u222d',
+    'toea;': '\u2928',
+    'top;': '\u22a4',
+    'topbot;': '\u2336',
+    'topcir;': '\u2af1',
+    'Topf;': '\U0001d54b',
+    'topf;': '\U0001d565',
+    'topfork;': '\u2ada',
+    'tosa;': '\u2929',
+    'tprime;': '\u2034',
+    'TRADE;': '\u2122',
+    'trade;': '\u2122',
+    'triangle;': '\u25b5',
+    'triangledown;': '\u25bf',
+    'triangleleft;': '\u25c3',
+    'trianglelefteq;': '\u22b4',
+    'triangleq;': '\u225c',
+    'triangleright;': '\u25b9',
+    'trianglerighteq;': '\u22b5',
+    'tridot;': '\u25ec',
+    'trie;': '\u225c',
+    'triminus;': '\u2a3a',
+    'TripleDot;': '\u20db',
+    'triplus;': '\u2a39',
+    'trisb;': '\u29cd',
+    'tritime;': '\u2a3b',
+    'trpezium;': '\u23e2',
+    'Tscr;': '\U0001d4af',
+    'tscr;': '\U0001d4c9',
+    'TScy;': '\u0426',
+    'tscy;': '\u0446',
+    'TSHcy;': '\u040b',
+    'tshcy;': '\u045b',
+    'Tstrok;': '\u0166',
+    'tstrok;': '\u0167',
+    'twixt;': '\u226c',
+    'twoheadleftarrow;': '\u219e',
+    'twoheadrightarrow;': '\u21a0',
+    'Uacute': '\xda',
+    'uacute': '\xfa',
+    'Uacute;': '\xda',
+    'uacute;': '\xfa',
+    'Uarr;': '\u219f',
+    'uArr;': '\u21d1',
+    'uarr;': '\u2191',
+    'Uarrocir;': '\u2949',
+    'Ubrcy;': '\u040e',
+    'ubrcy;': '\u045e',
+    'Ubreve;': '\u016c',
+    'ubreve;': '\u016d',
+    'Ucirc': '\xdb',
+    'ucirc': '\xfb',
+    'Ucirc;': '\xdb',
+    'ucirc;': '\xfb',
+    'Ucy;': '\u0423',
+    'ucy;': '\u0443',
+    'udarr;': '\u21c5',
+    'Udblac;': '\u0170',
+    'udblac;': '\u0171',
+    'udhar;': '\u296e',
+    'ufisht;': '\u297e',
+    'Ufr;': '\U0001d518',
+    'ufr;': '\U0001d532',
+    'Ugrave': '\xd9',
+    'ugrave': '\xf9',
+    'Ugrave;': '\xd9',
+    'ugrave;': '\xf9',
+    'uHar;': '\u2963',
+    'uharl;': '\u21bf',
+    'uharr;': '\u21be',
+    'uhblk;': '\u2580',
+    'ulcorn;': '\u231c',
+    'ulcorner;': '\u231c',
+    'ulcrop;': '\u230f',
+    'ultri;': '\u25f8',
+    'Umacr;': '\u016a',
+    'umacr;': '\u016b',
+    'uml': '\xa8',
+    'uml;': '\xa8',
+    'UnderBar;': '_',
+    'UnderBrace;': '\u23df',
+    'UnderBracket;': '\u23b5',
+    'UnderParenthesis;': '\u23dd',
+    'Union;': '\u22c3',
+    'UnionPlus;': '\u228e',
+    'Uogon;': '\u0172',
+    'uogon;': '\u0173',
+    'Uopf;': '\U0001d54c',
+    'uopf;': '\U0001d566',
+    'UpArrow;': '\u2191',
+    'Uparrow;': '\u21d1',
+    'uparrow;': '\u2191',
+    'UpArrowBar;': '\u2912',
+    'UpArrowDownArrow;': '\u21c5',
+    'UpDownArrow;': '\u2195',
+    'Updownarrow;': '\u21d5',
+    'updownarrow;': '\u2195',
+    'UpEquilibrium;': '\u296e',
+    'upharpoonleft;': '\u21bf',
+    'upharpoonright;': '\u21be',
+    'uplus;': '\u228e',
+    'UpperLeftArrow;': '\u2196',
+    'UpperRightArrow;': '\u2197',
+    'Upsi;': '\u03d2',
+    'upsi;': '\u03c5',
+    'upsih;': '\u03d2',
+    'Upsilon;': '\u03a5',
+    'upsilon;': '\u03c5',
+    'UpTee;': '\u22a5',
+    'UpTeeArrow;': '\u21a5',
+    'upuparrows;': '\u21c8',
+    'urcorn;': '\u231d',
+    'urcorner;': '\u231d',
+    'urcrop;': '\u230e',
+    'Uring;': '\u016e',
+    'uring;': '\u016f',
+    'urtri;': '\u25f9',
+    'Uscr;': '\U0001d4b0',
+    'uscr;': '\U0001d4ca',
+    'utdot;': '\u22f0',
+    'Utilde;': '\u0168',
+    'utilde;': '\u0169',
+    'utri;': '\u25b5',
+    'utrif;': '\u25b4',
+    'uuarr;': '\u21c8',
+    'Uuml': '\xdc',
+    'uuml': '\xfc',
+    'Uuml;': '\xdc',
+    'uuml;': '\xfc',
+    'uwangle;': '\u29a7',
+    'vangrt;': '\u299c',
+    'varepsilon;': '\u03f5',
+    'varkappa;': '\u03f0',
+    'varnothing;': '\u2205',
+    'varphi;': '\u03d5',
+    'varpi;': '\u03d6',
+    'varpropto;': '\u221d',
+    'vArr;': '\u21d5',
+    'varr;': '\u2195',
+    'varrho;': '\u03f1',
+    'varsigma;': '\u03c2',
+    'varsubsetneq;': '\u228a\ufe00',
+    'varsubsetneqq;': '\u2acb\ufe00',
+    'varsupsetneq;': '\u228b\ufe00',
+    'varsupsetneqq;': '\u2acc\ufe00',
+    'vartheta;': '\u03d1',
+    'vartriangleleft;': '\u22b2',
+    'vartriangleright;': '\u22b3',
+    'Vbar;': '\u2aeb',
+    'vBar;': '\u2ae8',
+    'vBarv;': '\u2ae9',
+    'Vcy;': '\u0412',
+    'vcy;': '\u0432',
+    'VDash;': '\u22ab',
+    'Vdash;': '\u22a9',
+    'vDash;': '\u22a8',
+    'vdash;': '\u22a2',
+    'Vdashl;': '\u2ae6',
+    'Vee;': '\u22c1',
+    'vee;': '\u2228',
+    'veebar;': '\u22bb',
+    'veeeq;': '\u225a',
+    'vellip;': '\u22ee',
+    'Verbar;': '\u2016',
+    'verbar;': '|',
+    'Vert;': '\u2016',
+    'vert;': '|',
+    'VerticalBar;': '\u2223',
+    'VerticalLine;': '|',
+    'VerticalSeparator;': '\u2758',
+    'VerticalTilde;': '\u2240',
+    'VeryThinSpace;': '\u200a',
+    'Vfr;': '\U0001d519',
+    'vfr;': '\U0001d533',
+    'vltri;': '\u22b2',
+    'vnsub;': '\u2282\u20d2',
+    'vnsup;': '\u2283\u20d2',
+    'Vopf;': '\U0001d54d',
+    'vopf;': '\U0001d567',
+    'vprop;': '\u221d',
+    'vrtri;': '\u22b3',
+    'Vscr;': '\U0001d4b1',
+    'vscr;': '\U0001d4cb',
+    'vsubnE;': '\u2acb\ufe00',
+    'vsubne;': '\u228a\ufe00',
+    'vsupnE;': '\u2acc\ufe00',
+    'vsupne;': '\u228b\ufe00',
+    'Vvdash;': '\u22aa',
+    'vzigzag;': '\u299a',
+    'Wcirc;': '\u0174',
+    'wcirc;': '\u0175',
+    'wedbar;': '\u2a5f',
+    'Wedge;': '\u22c0',
+    'wedge;': '\u2227',
+    'wedgeq;': '\u2259',
+    'weierp;': '\u2118',
+    'Wfr;': '\U0001d51a',
+    'wfr;': '\U0001d534',
+    'Wopf;': '\U0001d54e',
+    'wopf;': '\U0001d568',
+    'wp;': '\u2118',
+    'wr;': '\u2240',
+    'wreath;': '\u2240',
+    'Wscr;': '\U0001d4b2',
+    'wscr;': '\U0001d4cc',
+    'xcap;': '\u22c2',
+    'xcirc;': '\u25ef',
+    'xcup;': '\u22c3',
+    'xdtri;': '\u25bd',
+    'Xfr;': '\U0001d51b',
+    'xfr;': '\U0001d535',
+    'xhArr;': '\u27fa',
+    'xharr;': '\u27f7',
+    'Xi;': '\u039e',
+    'xi;': '\u03be',
+    'xlArr;': '\u27f8',
+    'xlarr;': '\u27f5',
+    'xmap;': '\u27fc',
+    'xnis;': '\u22fb',
+    'xodot;': '\u2a00',
+    'Xopf;': '\U0001d54f',
+    'xopf;': '\U0001d569',
+    'xoplus;': '\u2a01',
+    'xotime;': '\u2a02',
+    'xrArr;': '\u27f9',
+    'xrarr;': '\u27f6',
+    'Xscr;': '\U0001d4b3',
+    'xscr;': '\U0001d4cd',
+    'xsqcup;': '\u2a06',
+    'xuplus;': '\u2a04',
+    'xutri;': '\u25b3',
+    'xvee;': '\u22c1',
+    'xwedge;': '\u22c0',
+    'Yacute': '\xdd',
+    'yacute': '\xfd',
+    'Yacute;': '\xdd',
+    'yacute;': '\xfd',
+    'YAcy;': '\u042f',
+    'yacy;': '\u044f',
+    'Ycirc;': '\u0176',
+    'ycirc;': '\u0177',
+    'Ycy;': '\u042b',
+    'ycy;': '\u044b',
+    'yen': '\xa5',
+    'yen;': '\xa5',
+    'Yfr;': '\U0001d51c',
+    'yfr;': '\U0001d536',
+    'YIcy;': '\u0407',
+    'yicy;': '\u0457',
+    'Yopf;': '\U0001d550',
+    'yopf;': '\U0001d56a',
+    'Yscr;': '\U0001d4b4',
+    'yscr;': '\U0001d4ce',
+    'YUcy;': '\u042e',
+    'yucy;': '\u044e',
+    'yuml': '\xff',
+    'Yuml;': '\u0178',
+    'yuml;': '\xff',
+    'Zacute;': '\u0179',
+    'zacute;': '\u017a',
+    'Zcaron;': '\u017d',
+    'zcaron;': '\u017e',
+    'Zcy;': '\u0417',
+    'zcy;': '\u0437',
+    'Zdot;': '\u017b',
+    'zdot;': '\u017c',
+    'zeetrf;': '\u2128',
+    'ZeroWidthSpace;': '\u200b',
+    'Zeta;': '\u0396',
+    'zeta;': '\u03b6',
+    'Zfr;': '\u2128',
+    'zfr;': '\U0001d537',
+    'ZHcy;': '\u0416',
+    'zhcy;': '\u0436',
+    'zigrarr;': '\u21dd',
+    'Zopf;': '\u2124',
+    'zopf;': '\U0001d56b',
+    'Zscr;': '\U0001d4b5',
+    'zscr;': '\U0001d4cf',
+    'zwj;': '\u200d',
+    'zwnj;': '\u200c',
+}
 
-    """Substitute XML or HTML entities for the corresponding characters."""
+
+class EntitySubstitution(object):
+    """The ability to substitute XML or HTML entities for certain characters."""
 
     def _populate_class_variables():
-        lookup = {}
-        reverse_lookup = {}
-        characters_for_re = []
+        """Initialize variables used by this class to manage the plethora of
+        HTML5 named entities.
 
-        # &apos is an XHTML entity and an HTML 5, but not an HTML 4
-        # entity. We don't want to use it, but we want to recognize it on the way in.
-        #
-        # TODO: Ideally we would be able to recognize all HTML 5 named
-        # entities, but that's a little tricky.
-        extra = [(39, 'apos')]
-        for codepoint, name in list(codepoint2name.items()) + extra:
+        This function returns a 3-tuple containing two dictionaries
+        and a regular expression:
+
+        unicode_to_name - A mapping of Unicode strings like "⦨" to
+        entity names like "angmsdaa". When a single Unicode string has
+        multiple entity names, we try to choose the most commonly-used
+        name.
+
+        name_to_unicode: A mapping of entity names like "angmsdaa" to 
+        Unicode strings like "⦨".
+
+        named_entity_re: A regular expression matching (almost) any
+        Unicode string that corresponds to an HTML5 named entity.
+        """
+        unicode_to_name = {}
+        name_to_unicode = {}
+
+        short_entities = set()
+        long_entities_by_first_character = defaultdict(set)
+        
+        for name_with_semicolon, character in sorted(html5.items()):
+            # "It is intentional, for legacy compatibility, that many
+            # code points have multiple character reference names. For
+            # example, some appear both with and without the trailing
+            # semicolon, or with different capitalizations."
+            # - https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
+            #
+            # The parsers are in charge of handling (or not) character
+            # references with no trailing semicolon, so we remove the
+            # semicolon whenever it appears.
+            if name_with_semicolon.endswith(';'):
+                name = name_with_semicolon[:-1]
+            else:
+                name = name_with_semicolon
+
+            # When parsing HTML, we want to recognize any known named
+            # entity and convert it to a sequence of Unicode
+            # characters.
+            if name not in name_to_unicode:
+                name_to_unicode[name] = character
+
+            # When _generating_ HTML, we want to recognize special
+            # character sequences that _could_ be converted to named
+            # entities.
+            unicode_to_name[character] = name
+
+            # We also need to build a regular expression that lets us
+            # _find_ those characters in output strings so we can
+            # replace them.
+            #
+            # This is tricky, for two reasons.
+
+            if (len(character) == 1 and ord(character) < 128
+                and character not in '<>&'):
+                # First, it would be annoying to turn single ASCII
+                # characters like | into named entities like
+                # &verbar;. The exceptions are <>&, which we _must_
+                # turn into named entities to produce valid HTML.
+                continue
+
+            if len(character) > 1 and all(ord(x) < 128 for x in character):
+                # We also do not want to turn _combinations_ of ASCII
+                # characters like 'fj' into named entities like '&fjlig;',
+                # though that's more debateable.
+                continue
+
+            # Second, some named entities have a Unicode value that's
+            # a subset of the Unicode value for some _other_ named
+            # entity.  As an example, \u2267' is &GreaterFullEqual;,
+            # but '\u2267\u0338' is &NotGreaterFullEqual;. Our regular
+            # expression needs to match the first two characters of
+            # "\u2267\u0338foo", but only the first character of
+            # "\u2267foo".
+            #
+            # In this step, we build two sets of characters that
+            # _eventually_ need to go into the regular expression. But
+            # we won't know exactly what the regular expression needs
+            # to look like until we've gone through the entire list of
+            # named entities.
+            if len(character) == 1:
+                short_entities.add(character)
+            else:
+                long_entities_by_first_character[character[0]].add(character)
+
+        # Now that we've been through the entire list of entities, we
+        # can create a regular expression that matches any of them.
+        particles = set()
+        for short in short_entities:
+            long_versions = long_entities_by_first_character[short]
+            if not long_versions:
+                particles.add(short)
+            else:
+                ignore = "".join([x[1] for x in long_versions])
+                # This finds, e.g. \u2267 but only if it is _not_
+                # followed by \u0338.
+                particles.add("%s(?![%s])" % (short, ignore))
+        
+        for long_entities in list(long_entities_by_first_character.values()):
+            for long_entity in long_entities:
+                particles.add(long_entity)
+
+        re_definition = "(%s)" % "|".join(particles)
+                
+        # If an entity shows up in both html5 and codepoint2name, it's
+        # likely that HTML5 gives it several different names, such as
+        # 'rsquo' and 'rsquor'. When converting Unicode characters to
+        # named entities, the codepoint2name name should take
+        # precedence where possible, since that's the more easily
+        # recognizable one.
+        for codepoint, name in list(codepoint2name.items()):
             character = chr(codepoint)
-            if codepoint not in (34, 39):
-                # There's no point in turning the quotation mark into
-                # &quot; or the single quote into &apos;, unless it
-                # happens within an attribute value, which is handled
-                # elsewhere.
-                characters_for_re.append(character)
-                lookup[character] = name
-            # But we do want to recognize those entities on the way in and
-            # convert them to Unicode characters.
-            reverse_lookup[name] = character
-        re_definition = "[%s]" % "".join(characters_for_re)
-        return lookup, reverse_lookup, re.compile(re_definition)
+            unicode_to_name[character] = name
+
+        return unicode_to_name, name_to_unicode, re.compile(re_definition)
     (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
      CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
 
@@ -111,13 +2447,15 @@ class EntitySubstitution(object):
 
     @classmethod
     def _substitute_html_entity(cls, matchobj):
+        """Used with a regular expression to substitute the
+        appropriate HTML entity for a special character string."""
         entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
         return "&%s;" % entity
 
     @classmethod
     def _substitute_xml_entity(cls, matchobj):
         """Used with a regular expression to substitute the
-        appropriate XML entity for an XML special character."""
+        appropriate XML entity for a special character string."""
         entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
         return "&%s;" % entity
 
@@ -212,6 +2550,8 @@ class EntitySubstitution(object):
         containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
         character with "&eacute;" will make it more readable to some
         people.
+
+        :param s: A Unicode string.
         """
         return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
             cls._substitute_html_entity, s)
@@ -223,23 +2563,65 @@ class EncodingDetector:
     Order of precedence:
 
     1. Encodings you specifically tell EncodingDetector to try first
-    (the override_encodings argument to the constructor).
+    (the known_definite_encodings argument to the constructor).
 
-    2. An encoding declared within the bytestring itself, either in an
+    2. An encoding determined by sniffing the document's byte-order mark.
+
+    3. Encodings you specifically tell EncodingDetector to try if
+    byte-order mark sniffing fails (the user_encodings argument to the
+    constructor).
+
+    4. An encoding declared within the bytestring itself, either in an
     XML declaration (if the bytestring is to be interpreted as an XML
     document), or in a <meta> tag (if the bytestring is to be
     interpreted as an HTML document.)
 
-    3. An encoding detected through textual analysis by chardet,
+    5. An encoding detected through textual analysis by chardet,
     cchardet, or a similar external library.
 
     4. UTF-8.
 
     5. Windows-1252.
+
     """
-    def __init__(self, markup, override_encodings=None, is_html=False,
-                 exclude_encodings=None):
-        self.override_encodings = override_encodings or []
+    def __init__(self, markup, known_definite_encodings=None,
+                 is_html=False, exclude_encodings=None,
+                 user_encodings=None, override_encodings=None):
+        """Constructor.
+
+        :param markup: Some markup in an unknown encoding.
+
+        :param known_definite_encodings: When determining the encoding
+            of `markup`, these encodings will be tried first, in
+            order. In HTML terms, this corresponds to the "known
+            definite encoding" step defined here:
+            https://html.spec.whatwg.org/multipage/parsing.html#parsing-with-a-known-character-encoding
+
+        :param user_encodings: These encodings will be tried after the
+            `known_definite_encodings` have been tried and failed, and
+            after an attempt to sniff the encoding by looking at a
+            byte order mark has failed. In HTML terms, this
+            corresponds to the step "user has explicitly instructed
+            the user agent to override the document's character
+            encoding", defined here:
+            https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
+
+        :param override_encodings: A deprecated alias for
+            known_definite_encodings. Any encodings here will be tried
+            immediately after the encodings in
+            known_definite_encodings.
+
+        :param is_html: If True, this markup is considered to be
+            HTML. Otherwise it's assumed to be XML.
+
+        :param exclude_encodings: These encodings will not be tried,
+            even if they otherwise would be.
+
+        """
+        self.known_definite_encodings = list(known_definite_encodings or [])
+        if override_encodings:
+            self.known_definite_encodings += override_encodings
+        self.user_encodings = user_encodings or []
         exclude_encodings = exclude_encodings or []
         self.exclude_encodings = set([x.lower() for x in exclude_encodings])
         self.chardet_encoding = None
@@ -250,6 +2632,12 @@ class EncodingDetector:
         self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
 
     def _usable(self, encoding, tried):
+        """Should we even bother to try this encoding?
+
+        :param encoding: Name of an encoding.
+        :param tried: Encodings that have already been tried. This will be modified
+            as a side effect.
+        """
         if encoding is not None:
             encoding = encoding.lower()
             if encoding in self.exclude_encodings:
@@ -261,9 +2649,14 @@ class EncodingDetector:
 
     @property
     def encodings(self):
-        """Yield a number of encodings that might work for this markup."""
+        """Yield a number of encodings that might work for this markup.
+
+        :yield: A sequence of strings.
+        """
         tried = set()
-        for e in self.override_encodings:
+
+        # First, try the known definite encodings
+        for e in self.known_definite_encodings:
             if self._usable(e, tried):
                 yield e
 
@@ -272,6 +2665,12 @@ class EncodingDetector:
         if self._usable(self.sniffed_encoding, tried):
             yield self.sniffed_encoding
 
+        # Sniffing the byte-order mark did nothing; try the user
+        # encodings.
+        for e in self.user_encodings:
+            if self._usable(e, tried):
+                yield e
+            
         # Look within the document for an XML or HTML encoding
         # declaration.
         if self.declared_encoding is None:
@@ -294,7 +2693,11 @@ class EncodingDetector:
 
     @classmethod
     def strip_byte_order_mark(cls, data):
-        """If a byte-order mark is present, strip it and return the encoding it implies."""
+        """If a byte-order mark is present, strip it and return the encoding it implies.
+
+        :param data: Some markup.
+        :return: A 2-tuple (modified data, implied encoding)
+        """
         encoding = None
         if isinstance(data, str):
             # Unicode data cannot have a byte-order mark.
@@ -326,6 +2729,13 @@ class EncodingDetector:
 
         An HTML encoding is declared in a <meta> tag, hopefully near the
         beginning of the document.
+
+        :param markup: Some markup.
+        :param is_html: If True, this markup is considered to be HTML. Otherwise
+            it's assumed to be XML.
+        :param search_entire_document: Since an encoding is supposed to declared near the beginning
+            of the document, most of the time it's only necessary to search a few kilobytes of data.
+            Set this to True to force this method to search the entire document.
         """
         if search_entire_document:
             xml_endpos = html_endpos = len(markup)
@@ -371,15 +2781,53 @@ class UnicodeDammit:
         "iso-8859-2",
         ]
 
-    def __init__(self, markup, override_encodings=[],
-                 smart_quotes_to=None, is_html=False, exclude_encodings=[]):
+    def __init__(self, markup, known_definite_encodings=[],
+                 smart_quotes_to=None, is_html=False, exclude_encodings=[],
+                 user_encodings=None, override_encodings=None
+    ):
+        """Constructor.
+
+        :param markup: A bytestring representing markup in an unknown encoding.
+
+        :param known_definite_encodings: When determining the encoding
+            of `markup`, these encodings will be tried first, in
+            order. In HTML terms, this corresponds to the "known
+            definite encoding" step defined here:
+            https://html.spec.whatwg.org/multipage/parsing.html#parsing-with-a-known-character-encoding
+
+        :param user_encodings: These encodings will be tried after the
+            `known_definite_encodings` have been tried and failed, and
+            after an attempt to sniff the encoding by looking at a
+            byte order mark has failed. In HTML terms, this
+            corresponds to the step "user has explicitly instructed
+            the user agent to override the document's character
+            encoding", defined here:
+            https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
+
+        :param override_encodings: A deprecated alias for
+            known_definite_encodings. Any encodings here will be tried
+            immediately after the encodings in
+            known_definite_encodings.
+
+        :param smart_quotes_to: By default, Microsoft smart quotes will, like all other characters, be converted
+           to Unicode characters. Setting this to 'ascii' will convert them to ASCII quotes instead.
+           Setting it to 'xml' will convert them to XML entity references, and setting it to 'html'
+           will convert them to HTML entity references.
+        :param is_html: If True, this markup is considered to be HTML. Otherwise
+            it's assumed to be XML.
+        :param exclude_encodings: These encodings will not be considered, even
+            if the sniffing code thinks they might make sense.
+
+        """
         self.smart_quotes_to = smart_quotes_to
         self.tried_encodings = []
         self.contains_replacement_characters = False
         self.is_html = is_html
         self.log = logging.getLogger(__name__)
         self.detector = EncodingDetector(
-            markup, override_encodings, is_html, exclude_encodings)
+            markup, known_definite_encodings, is_html, exclude_encodings,
+            user_encodings, override_encodings
+        )
 
         # Short-circuit if the data is in Unicode to begin with.
         if isinstance(markup, str) or markup == '':
@@ -439,6 +2887,10 @@ class UnicodeDammit:
         return sub
 
     def _convert_from(self, proposed, errors="strict"):
+        """Attempt to convert the markup to the proposed encoding.
+
+        :param proposed: The name of a character encoding.
+        """
         proposed = self.find_codec(proposed)
         if not proposed or (proposed, errors) in self.tried_encodings:
             return None
@@ -453,30 +2905,40 @@ class UnicodeDammit:
             markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
 
         try:
-            #print "Trying to convert document to %s (errors=%s)" % (
-            #    proposed, errors)
+            #print("Trying to convert document to %s (errors=%s)" % (
+            #    proposed, errors))
             u = self._to_unicode(markup, proposed, errors)
             self.markup = u
             self.original_encoding = proposed
         except Exception as e:
-            #print "That didn't work!"
-            #print e
+            #print("That didn't work!")
+            #print(e)
             return None
-        #print "Correct encoding: %s" % proposed
+        #print("Correct encoding: %s" % proposed)
         return self.markup
 
     def _to_unicode(self, data, encoding, errors="strict"):
-        '''Given a string and its encoding, decodes the string into Unicode.
-        %encoding is a string recognized by encodings.aliases'''
+        """Given a string and its encoding, decodes the string into Unicode.
+
+        :param encoding: The name of an encoding.
+        """
         return str(data, encoding, errors)
 
     @property
     def declared_html_encoding(self):
+        """If the markup is an HTML document, returns the encoding declared _within_
+        the document.
+        """
         if not self.is_html:
             return None
         return self.detector.declared_encoding
 
     def find_codec(self, charset):
+        """Convert the name of a character set to a codec name.
+
+        :param charset: The name of a character set.
+        :return: The name of a codec.
+        """
         value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
                or (charset and self._codec(charset.replace("-", "")))
                or (charset and self._codec(charset.replace("-", "_")))
@@ -815,12 +3277,16 @@ class UnicodeDammit:
         Currently the only situation supported is Windows-1252 (or its
         subset ISO-8859-1), embedded in UTF-8.
 
-        The input must be a bytestring. If you've already converted
-        the document to Unicode, you're too late.
-
-        The output is a bytestring in which `embedded_encoding`
-        characters have been converted to their `main_encoding`
-        equivalents.
+        :param in_bytes: A bytestring that you suspect contains
+            characters from multiple encodings. Note that this _must_
+            be a bytestring. If you've already converted the document
+            to Unicode, you're too late.
+        :param main_encoding: The primary encoding of `in_bytes`.
+        :param embedded_encoding: The encoding that was used to embed characters
+            in the main document.
+        :return: A bytestring in which `embedded_encoding`
+          characters have been converted to their `main_encoding`
+          equivalents.
         """
         if embedded_encoding.replace('_', '-').lower() not in (
             'windows-1252', 'windows_1252'):
diff --git a/lib/bs4/diagnose.py b/lib/bs4/diagnose.py
index a1ae23dc..500e92df 100644
--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@@ -20,9 +20,13 @@ import sys
 import cProfile
 
 def diagnose(data):
-    """Diagnostic suite for isolating common problems."""
-    print("Diagnostic running on Beautiful Soup %s" % __version__)
-    print("Python version %s" % sys.version)
+    """Diagnostic suite for isolating common problems.
+
+    :param data: A string containing markup that needs to be explained.
+    :return: None; diagnostics are printed to standard output.
+    """
+    print(("Diagnostic running on Beautiful Soup %s" % __version__))
+    print(("Python version %s" % sys.version))
 
     basic_parsers = ["html.parser", "html5lib", "lxml"]
     for name in basic_parsers:
@@ -39,65 +43,76 @@ def diagnose(data):
         basic_parsers.append("lxml-xml")
         try:
             from lxml import etree
-            print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
+            print(("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))))
         except ImportError as e:
-            print (
+            print(
                 "lxml is not installed or couldn't be imported.")
 
 
     if 'html5lib' in basic_parsers:
         try:
             import html5lib
-            print("Found html5lib version %s" % html5lib.__version__)
+            print(("Found html5lib version %s" % html5lib.__version__))
         except ImportError as e:
-            print (
+            print(
                 "html5lib is not installed or couldn't be imported.")
 
     if hasattr(data, 'read'):
         data = data.read()
     elif data.startswith("http:") or data.startswith("https:"):
-        print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
+        print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
         print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
         return
     else:
         try:
             if os.path.exists(data):
-                print('"%s" looks like a filename. Reading data from the file.' % data)
+                print(('"%s" looks like a filename. Reading data from the file.' % data))
                 with open(data) as fp:
                     data = fp.read()
         except ValueError:
             # This can happen on some platforms when the 'filename' is
             # too long. Assume it's data and not a filename.
             pass
-        print()
+        print("")
 
     for parser in basic_parsers:
-        print("Trying to parse your markup with %s" % parser)
+        print(("Trying to parse your markup with %s" % parser))
         success = False
         try:
             soup = BeautifulSoup(data, features=parser)
             success = True
         except Exception as e:
-            print("%s could not parse the markup." % parser)
+            print(("%s could not parse the markup." % parser))
             traceback.print_exc()
         if success:
-            print("Here's what %s did with the markup:" % parser)
-            print(soup.prettify())
+            print(("Here's what %s did with the markup:" % parser))
+            print((soup.prettify()))
 
-        print("-" * 80)
+        print(("-" * 80))
 
 def lxml_trace(data, html=True, **kwargs):
     """Print out the lxml events that occur during parsing.
 
     This lets you see how lxml parses a document when no Beautiful
-    Soup code is running.
+    Soup code is running. You can use this to determine whether
+    an lxml-specific problem is in Beautiful Soup's lxml tree builders
+    or in lxml itself.
+
+    :param data: Some markup.
+    :param html: If True, markup will be parsed with lxml's HTML parser.
+       if False, lxml's XML parser will be used.
     """
     from lxml import etree
     for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
         print(("%s, %4s, %s" % (event, element.tag, element.text)))
 
 class AnnouncingParser(HTMLParser):
-    """Announces HTMLParser parse events, without doing anything else."""
+    """Subclass of HTMLParser that announces parse events, without doing
+    anything else.
+
+    You can use this to get a picture of how html.parser sees a given
+    document. The easiest way to do this is to call `htmlparser_trace`.
+    """
 
     def _p(self, s):
         print(s)
@@ -134,6 +149,8 @@ def htmlparser_trace(data):
 
     This lets you see how HTMLParser parses a document when no
     Beautiful Soup code is running.
+
+    :param data: Some markup.
     """
     parser = AnnouncingParser()
     parser.feed(data)
@@ -176,9 +193,9 @@ def rdoc(num_elements=1000):
 
 def benchmark_parsers(num_elements=100000):
     """Very basic head-to-head performance benchmark."""
-    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
+    print(("Comparative parser benchmark on Beautiful Soup %s" % __version__))
     data = rdoc(num_elements)
-    print("Generated a large invalid HTML document (%d bytes)." % len(data))
+    print(("Generated a large invalid HTML document (%d bytes)." % len(data)))
     
     for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
         success = False
@@ -188,26 +205,26 @@ def benchmark_parsers(num_elements=100000):
             b = time.time()
             success = True
         except Exception as e:
-            print("%s could not parse the markup." % parser)
+            print(("%s could not parse the markup." % parser))
             traceback.print_exc()
         if success:
-            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
+            print(("BS4+%s parsed the markup in %.2fs." % (parser, b-a)))
 
     from lxml import etree
     a = time.time()
     etree.HTML(data)
     b = time.time()
-    print("Raw lxml parsed the markup in %.2fs." % (b-a))
+    print(("Raw lxml parsed the markup in %.2fs." % (b-a)))
 
     import html5lib
     parser = html5lib.HTMLParser()
     a = time.time()
     parser.parse(data)
     b = time.time()
-    print("Raw html5lib parsed the markup in %.2fs." % (b-a))
+    print(("Raw html5lib parsed the markup in %.2fs." % (b-a)))
 
 def profile(num_elements=100000, parser="lxml"):
-
+    """Use Python's profiler on a randomly generated document."""
     filehandle = tempfile.NamedTemporaryFile()
     filename = filehandle.name
 
@@ -220,5 +237,6 @@ def profile(num_elements=100000, parser="lxml"):
     stats.sort_stats("cumulative")
     stats.print_stats('_html5lib|bs4', 50)
 
+# If this file is run as a script, standard input is diagnosed.
 if __name__ == '__main__':
     diagnose(sys.stdin.read())
diff --git a/lib/bs4/element.py b/lib/bs4/element.py
index 69399e5c..82a986e4 100644
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@@ -43,17 +43,49 @@ def _alias(attr):
     return alias
 
 
-class NamespacedAttribute(str):
+# These encodings are recognized by Python (so PageElement.encode
+# could theoretically support them) but XML and HTML don't recognize
+# them (so they should not show up in an XML or HTML document as that
+# document's encoding).
+#
+# If an XML document is encoded in one of these encodings, no encoding
+# will be mentioned in the XML declaration. If an HTML document is
+# encoded in one of these encodings, and the HTML document has a
+# <meta> tag that mentions an encoding, the encoding will be given as
+# the empty string.
+#
+# Source:
+# https://docs.python.org/3/library/codecs.html#python-specific-encodings
+PYTHON_SPECIFIC_ENCODINGS = set([
+    "idna",
+    "mbcs",
+    "oem",
+    "palmos",
+    "punycode",
+    "raw_unicode_escape",
+    "undefined",
+    "unicode_escape",
+    "raw-unicode-escape",
+    "unicode-escape",
+    "string-escape",
+    "string_escape",
+])
+    
 
+class NamespacedAttribute(str):
+    """A namespaced string (e.g. 'xml:lang') that remembers the namespace
+    ('xml') and the name ('lang') that were used to create it.
+    """
+    
     def __new__(cls, prefix, name=None, namespace=None):
         if not name:
             # This is the default namespace. Its name "has no value"
             # per https://www.w3.org/TR/xml-names/#defaulting
             name = None
 
-        if name is None:
+        if not name:
             obj = str.__new__(cls, prefix)
-        elif prefix is None:
+        elif not prefix:
             # Not really namespaced.
             obj = str.__new__(cls, name)
         else:
@@ -79,6 +111,11 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
         return obj
 
     def encode(self, encoding):
+        """When an HTML document is being encoded to a given encoding, the
+        value of a meta tag's 'charset' is the name of the encoding.
+        """
+        if encoding in PYTHON_SPECIFIC_ENCODINGS:
+            return ''
         return encoding
 
 
@@ -104,19 +141,39 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
         return obj
 
     def encode(self, encoding):
+        if encoding in PYTHON_SPECIFIC_ENCODINGS:
+            return ''
         def rewrite(match):
             return match.group(1) + encoding
         return self.CHARSET_RE.sub(rewrite, self.original_value)
 
     
 class PageElement(object):
-    """Contains the navigational information for some part of the page
-    (either a tag or a piece of text)"""
+    """Contains the navigational information for some part of the page:
+    that is, its current location in the parse tree.
+
+    NavigableString, Tag, etc. are all subclasses of PageElement.
+    """
    
     def setup(self, parent=None, previous_element=None, next_element=None,
               previous_sibling=None, next_sibling=None):
         """Sets up the initial relations between this element and
-        other elements."""
+        other elements.
+
+        :param parent: The parent of this element.
+
+        :param previous_element: The element parsed immediately before
+            this one.
+        
+        :param next_element: The element parsed immediately before
+            this one.
+
+        :param previous_sibling: The most recently encountered element
+            on the same level of the parse tree as this one.
+
+        :param previous_sibling: The next element to be encountered
+            on the same level of the parse tree as this one.
+        """
         self.parent = parent
 
         self.previous_element = previous_element
@@ -140,7 +197,11 @@ class PageElement(object):
             self.previous_sibling.next_sibling = self
 
     def format_string(self, s, formatter):
-        """Format the given string using the given formatter."""
+        """Format the given string using the given formatter.
+
+        :param s: A string.
+        :param formatter: A Formatter object, or a string naming one of the standard formatters.
+        """
         if formatter is None:
             return s
         if not isinstance(formatter, Formatter):
@@ -153,9 +214,10 @@ class PageElement(object):
         if necessary.
 
         :param formatter: Can be a Formatter object (used as-is), a
-        function (used as the entity substitution hook for an
-        XMLFormatter or HTMLFormatter), or a string (used to look up
-        an XMLFormatter or HTMLFormatter in the appropriate registry.
+            function (used as the entity substitution hook for an
+            XMLFormatter or HTMLFormatter), or a string (used to look
+            up an XMLFormatter or HTMLFormatter in the appropriate
+            registry.
         """
         if isinstance(formatter, Formatter):
             return formatter
@@ -163,7 +225,7 @@ class PageElement(object):
             c = XMLFormatter
         else:
             c = HTMLFormatter
-        if callable(formatter):
+        if isinstance(formatter, Callable):
             return c(entity_substitution=formatter)
         return c.REGISTRY[formatter]
 
@@ -193,30 +255,82 @@ class PageElement(object):
     nextSibling = _alias("next_sibling")  # BS3
     previousSibling = _alias("previous_sibling")  # BS3
 
-    def replace_with(self, replace_with):
+    default = object()
+    def _all_strings(self, strip=False, types=default):
+        """Yield all strings of certain classes, possibly stripping them.
+        
+        This is implemented differently in Tag and NavigableString.
+        """
+        raise NotImplementedError()
+   
+    @property
+    def stripped_strings(self):
+        """Yield all strings in this PageElement, stripping them first.
+
+        :yield: A sequence of stripped strings.
+        """
+        for string in self._all_strings(True):
+            yield string
+
+    def get_text(self, separator="", strip=False,
+                 types=default):
+        """Get all child strings of this PageElement, concatenated using the
+        given separator.
+
+        :param separator: Strings will be concatenated using this separator.
+
+        :param strip: If True, strings will be stripped before being
+            concatenated.
+
+        :param types: A tuple of NavigableString subclasses. Any
+            strings of a subclass not found in this list will be
+            ignored. Although there are exceptions, the default
+            behavior in most cases is to consider only NavigableString
+            and CData objects. That means no comments, processing
+            instructions, etc.
+
+        :return: A string.
+        """
+        return separator.join([s for s in self._all_strings(
+                    strip, types=types)])
+    getText = get_text
+    text = property(get_text)
+    
+    def replace_with(self, *args):
+        """Replace this PageElement with one or more PageElements, keeping the 
+        rest of the tree the same.
+        
+        :param args: One or more PageElements.
+        :return: `self`, no longer part of the tree.
+        """
         if self.parent is None:
             raise ValueError(
                 "Cannot replace one element with another when the "
                 "element to be replaced is not part of a tree.")
-        if replace_with is self:
+        if len(args) == 1 and args[0] is self:
             return
-        if replace_with is self.parent:
+        if any(x is self.parent for x in args):
             raise ValueError("Cannot replace a Tag with its parent.")
         old_parent = self.parent
         my_index = self.parent.index(self)
-        self.extract()
-        old_parent.insert(my_index, replace_with)
+        self.extract(_self_index=my_index)
+        for idx, replace_with in enumerate(args, start=my_index):
+            old_parent.insert(idx, replace_with)
         return self
     replaceWith = replace_with  # BS3
 
     def unwrap(self):
+        """Replace this PageElement with its contents.
+
+        :return: `self`, no longer part of the tree.
+        """
         my_parent = self.parent
         if self.parent is None:
             raise ValueError(
                 "Cannot replace an element with its contents when that"
                 "element is not part of a tree.")
         my_index = self.parent.index(self)
-        self.extract()
+        self.extract(_self_index=my_index)
         for child in reversed(self.contents[:]):
             my_parent.insert(my_index, child)
         return self
@@ -224,14 +338,29 @@ class PageElement(object):
     replaceWithChildren = unwrap  # BS3
 
     def wrap(self, wrap_inside):
+        """Wrap this PageElement inside another one.
+
+        :param wrap_inside: A PageElement.
+        :return: `wrap_inside`, occupying the position in the tree that used
+           to be occupied by `self`, and with `self` inside it.
+        """
         me = self.replace_with(wrap_inside)
         wrap_inside.append(me)
         return wrap_inside
 
-    def extract(self):
-        """Destructively rips this element out of the tree."""
+    def extract(self, _self_index=None):
+        """Destructively rips this element out of the tree.
+
+        :param _self_index: The location of this element in its parent's
+           .contents, if known. Passing this in allows for a performance
+           optimization.
+
+        :return: `self`, no longer part of the tree.
+        """
         if self.parent is not None:
-            del self.parent.contents[self.parent.index(self)]
+            if _self_index is None:
+                _self_index = self.parent.index(self)
+            del self.parent.contents[_self_index]
 
         #Find the two elements that would be next to each other if
         #this element (and any children) hadn't been parsed. Connect
@@ -258,7 +387,12 @@ class PageElement(object):
         return self
 
     def _last_descendant(self, is_initialized=True, accept_self=True):
-        "Finds the last element beneath this object to be parsed."
+        """Finds the last element beneath this object to be parsed.
+
+        :param is_initialized: Has `setup` been called on this PageElement
+            yet?
+        :param accept_self: Is `self` an acceptable answer to the question?
+        """
         if is_initialized and self.next_sibling is not None:
             last_child = self.next_sibling.previous_element
         else:
@@ -272,6 +406,14 @@ class PageElement(object):
     _lastRecursiveChild = _last_descendant
 
     def insert(self, position, new_child):
+        """Insert a new PageElement in the list of this PageElement's children.
+
+        This works the same way as `list.insert`.
+
+        :param position: The numeric position that should be occupied
+           in `self.children` by the new PageElement. 
+        :param new_child: A PageElement.
+        """
         if new_child is None:
             raise ValueError("Cannot insert None into a tag.")
         if new_child is self:
@@ -346,19 +488,32 @@ class PageElement(object):
         self.contents.insert(position, new_child)
 
     def append(self, tag):
-        """Appends the given tag to the contents of this tag."""
+        """Appends the given PageElement to the contents of this one.
+
+        :param tag: A PageElement.
+        """
         self.insert(len(self.contents), tag)
 
     def extend(self, tags):
-        """Appends the given tags to the contents of this tag."""
+        """Appends the given PageElements to this one's contents.
+
+        :param tags: A list of PageElements.
+        """
+        if isinstance(tags, Tag):
+            # Calling self.append() on another tag's contents will change
+            # the list we're iterating over. Make a list that won't
+            # change.
+            tags = list(tags.contents)
         for tag in tags:
             self.append(tag)
 
     def insert_before(self, *args):
         """Makes the given element(s) the immediate predecessor of this one.
 
-        The elements will have the same parent, and the given elements
+        All the elements will have the same parent, and the given elements
         will be immediately before this one.
+
+        :param args: One or more PageElements.
         """
         parent = self.parent
         if parent is None:
@@ -379,6 +534,8 @@ class PageElement(object):
 
         The elements will have the same parent, and the given elements
         will be immediately after this one.
+
+        :param args: One or more PageElements.
         """
         # Do all error checking before modifying the tree.
         parent = self.parent
@@ -399,70 +556,172 @@ class PageElement(object):
             offset += 1
 
     def find_next(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the first item that matches the given criteria and
-        appears after this Tag in the document."""
+        """Find the first PageElement that matches the given criteria and
+        appears later in the document than this PageElement.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
         return self._find_one(self.find_all_next, name, attrs, text, **kwargs)
     findNext = find_next  # BS3
 
     def find_all_next(self, name=None, attrs={}, text=None, limit=None,
                     **kwargs):
-        """Returns all items that match the given criteria and appear
-        after this Tag in the document."""
+        """Find all PageElements that match the given criteria and appear
+        later in the document than this PageElement.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :param limit: Stop looking after finding this many results.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A ResultSet containing PageElements.
+        """
         return self._find_all(name, attrs, text, limit, self.next_elements,
                              **kwargs)
     findAllNext = find_all_next  # BS3
 
     def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the closest sibling to this Tag that matches the
-        given criteria and appears after this Tag in the document."""
+        """Find the closest sibling to this PageElement that matches the
+        given criteria and appears later in the document.
+
+        All find_* methods take a common set of arguments. See the
+        online documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
         return self._find_one(self.find_next_siblings, name, attrs, text,
                              **kwargs)
     findNextSibling = find_next_sibling  # BS3
 
     def find_next_siblings(self, name=None, attrs={}, text=None, limit=None,
                            **kwargs):
-        """Returns the siblings of this Tag that match the given
-        criteria and appear after this Tag in the document."""
+        """Find all siblings of this PageElement that match the given criteria
+        and appear later in the document.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :param limit: Stop looking after finding this many results.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A ResultSet of PageElements.
+        :rtype: bs4.element.ResultSet
+        """
         return self._find_all(name, attrs, text, limit,
                               self.next_siblings, **kwargs)
     findNextSiblings = find_next_siblings   # BS3
     fetchNextSiblings = find_next_siblings  # BS2
 
     def find_previous(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the first item that matches the given criteria and
-        appears before this Tag in the document."""
+        """Look backwards in the document from this PageElement and find the
+        first PageElement that matches the given criteria.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
         return self._find_one(
             self.find_all_previous, name, attrs, text, **kwargs)
     findPrevious = find_previous  # BS3
 
     def find_all_previous(self, name=None, attrs={}, text=None, limit=None,
                         **kwargs):
-        """Returns all items that match the given criteria and appear
-        before this Tag in the document."""
+        """Look backwards in the document from this PageElement and find all
+        PageElements that match the given criteria.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :param limit: Stop looking after finding this many results.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A ResultSet of PageElements.
+        :rtype: bs4.element.ResultSet
+        """
         return self._find_all(name, attrs, text, limit, self.previous_elements,
                            **kwargs)
     findAllPrevious = find_all_previous  # BS3
     fetchPrevious = find_all_previous    # BS2
 
     def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the closest sibling to this Tag that matches the
-        given criteria and appears before this Tag in the document."""
+        """Returns the closest sibling to this PageElement that matches the
+        given criteria and appears earlier in the document.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
         return self._find_one(self.find_previous_siblings, name, attrs, text,
                              **kwargs)
     findPreviousSibling = find_previous_sibling  # BS3
 
     def find_previous_siblings(self, name=None, attrs={}, text=None,
                                limit=None, **kwargs):
-        """Returns the siblings of this Tag that match the given
-        criteria and appear before this Tag in the document."""
+        """Returns all siblings to this PageElement that match the
+        given criteria and appear earlier in the document.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :param limit: Stop looking after finding this many results.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A ResultSet of PageElements.
+        :rtype: bs4.element.ResultSet
+        """
         return self._find_all(name, attrs, text, limit,
                               self.previous_siblings, **kwargs)
     findPreviousSiblings = find_previous_siblings   # BS3
     fetchPreviousSiblings = find_previous_siblings  # BS2
 
     def find_parent(self, name=None, attrs={}, **kwargs):
-        """Returns the closest parent of this Tag that matches the given
-        criteria."""
+        """Find the closest parent of this PageElement that matches the given
+        criteria.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :kwargs: A dictionary of filters on attribute values.
+
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
         # NOTE: We can't use _find_one because findParents takes a different
         # set of arguments.
         r = None
@@ -473,9 +732,19 @@ class PageElement(object):
     findParent = find_parent  # BS3
 
     def find_parents(self, name=None, attrs={}, limit=None, **kwargs):
-        """Returns the parents of this Tag that match the given
-        criteria."""
+        """Find all parents of this PageElement that match the given criteria.
 
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param limit: Stop looking after finding this many results.
+        :kwargs: A dictionary of filters on attribute values.
+
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
         return self._find_all(name, attrs, None, limit, self.parents,
                              **kwargs)
     findParents = find_parents   # BS3
@@ -483,10 +752,20 @@ class PageElement(object):
 
     @property
     def next(self):
+        """The PageElement, if any, that was parsed just after this one.
+
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
         return self.next_element
 
     @property
     def previous(self):
+        """The PageElement, if any, that was parsed just before this one.
+
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
         return self.previous_element
 
     #These methods do the real heavy lifting.
@@ -554,6 +833,10 @@ class PageElement(object):
     #NavigableStrings and Tags.
     @property
     def next_elements(self):
+        """All PageElements that were parsed after this one.
+
+        :yield: A sequence of PageElements.
+        """
         i = self.next_element
         while i is not None:
             yield i
@@ -561,6 +844,11 @@ class PageElement(object):
 
     @property
     def next_siblings(self):
+        """All PageElements that are siblings of this one but were parsed
+        later.
+
+        :yield: A sequence of PageElements.
+        """
         i = self.next_sibling
         while i is not None:
             yield i
@@ -568,6 +856,10 @@ class PageElement(object):
 
     @property
     def previous_elements(self):
+        """All PageElements that were parsed before this one.
+
+        :yield: A sequence of PageElements.
+        """
         i = self.previous_element
         while i is not None:
             yield i
@@ -575,6 +867,11 @@ class PageElement(object):
 
     @property
     def previous_siblings(self):
+        """All PageElements that are siblings of this one but were parsed
+        earlier.
+
+        :yield: A sequence of PageElements.
+        """
         i = self.previous_sibling
         while i is not None:
             yield i
@@ -582,11 +879,23 @@ class PageElement(object):
 
     @property
     def parents(self):
+        """All PageElements that are parents of this PageElement.
+
+        :yield: A sequence of PageElements.
+        """
         i = self.parent
         while i is not None:
             yield i
             i = i.parent
 
+    @property
+    def decomposed(self):
+        """Check whether a PageElement has been decomposed.
+
+        :rtype: bool
+        """
+        return getattr(self, '_decomposed', False) or False
+            
     # Old non-property versions of the generators, for backwards
     # compatibility with BS3.
     def nextGenerator(self):
@@ -606,6 +915,11 @@ class PageElement(object):
 
 
 class NavigableString(str, PageElement):
+    """A Python Unicode string that is part of a parse tree.
+
+    When Beautiful Soup parses the markup <b>penguin</b>, it will
+    create a NavigableString for the string "penguin".
+    """   
 
     PREFIX = ''
     SUFFIX = ''
@@ -651,35 +965,100 @@ class NavigableString(str, PageElement):
                     self.__class__.__name__, attr))
 
     def output_ready(self, formatter="minimal"):
-        """Run the string through the provided formatter."""
+        """Run the string through the provided formatter.
+
+        :param formatter: A Formatter object, or a string naming one of the standard formatters.
+        """
         output = self.format_string(self, formatter)
         return self.PREFIX + output + self.SUFFIX
 
     @property
     def name(self):
+        """Since a NavigableString is not a Tag, it has no .name.
+
+        This property is implemented so that code like this doesn't crash
+        when run on a mixture of Tag and NavigableString objects:
+            [x.name for x in tag.children]
+        """
         return None
 
     @name.setter
     def name(self, name):
+        """Prevent NavigableString.name from ever being set."""
         raise AttributeError("A NavigableString cannot be given a name.")
 
+    def _all_strings(self, strip=False, types=PageElement.default):
+        """Yield all strings of certain classes, possibly stripping them.
+
+        This makes it easy for NavigableString to implement methods
+        like get_text() as conveniences, creating a consistent
+        text-extraction API across all PageElements.
+
+        :param strip: If True, all strings will be stripped before being
+            yielded.
+
+        :param types: A tuple of NavigableString subclasses. If this
+            NavigableString isn't one of those subclasses, the
+            sequence will be empty. By default, the subclasses
+            considered are NavigableString and CData objects. That
+            means no comments, processing instructions, etc.
+
+        :yield: A sequence that either contains this string, or is empty.
+
+        """
+        if types is self.default:
+            # This is kept in Tag because it's full of subclasses of
+            # this class, which aren't defined until later in the file.
+            types = Tag.DEFAULT_INTERESTING_STRING_TYPES
+
+        # Do nothing if the caller is looking for specific types of
+        # string, and we're of a different type.
+        my_type = type(self)
+        if types is not None:
+            if isinstance(types, type):
+                # Looking for a single type.
+                if my_type is not types:
+                    return
+            elif my_type not in types:
+                # Looking for one of a list of types.
+                return
+
+        value = self
+        if strip:
+            value = value.strip()
+        if len(value) > 0:
+            yield value
+    strings = property(_all_strings)
+
 class PreformattedString(NavigableString):
     """A NavigableString not subject to the normal formatting rules.
 
-    The string will be passed into the formatter (to trigger side effects),
-    but the return value will be ignored.
+    This is an abstract class used for special kinds of strings such
+    as comments (the Comment class) and CDATA blocks (the CData
+    class).
     """
-
+    
+    PREFIX = ''
+    SUFFIX = ''
+    
     def output_ready(self, formatter=None):
-        """CData strings are passed into the formatter, purely
-        for any side effects. The return value is ignored.
+        """Make this string ready for output by adding any subclass-specific
+            prefix or suffix.
+
+        :param formatter: A Formatter object, or a string naming one
+            of the standard formatters. The string will be passed into the
+            Formatter, but only to trigger any side effects: the return
+            value is ignored.
+
+        :return: The string, with any subclass-specific prefix and
+           suffix added on.
         """
         if formatter is not None:
             ignore = self.format_string(self, formatter)
         return self.PREFIX + self + self.SUFFIX
 
 class CData(PreformattedString):
-
+    """A CDATA block."""
     PREFIX = '<![CDATA['
     SUFFIX = ']]>'
 
@@ -695,20 +1074,32 @@ class XMLProcessingInstruction(ProcessingInstruction):
     SUFFIX = '?>'
 
 class Comment(PreformattedString):
-
+    """An HTML or XML comment."""
     PREFIX = '<!--'
     SUFFIX = '-->'
 
 
 class Declaration(PreformattedString):
+    """An XML declaration."""
     PREFIX = '<?'
     SUFFIX = '?>'
 
 
 class Doctype(PreformattedString):
-
+    """A document type declaration."""
     @classmethod
     def for_name_and_ids(cls, name, pub_id, system_id):
+        """Generate an appropriate document type declaration for a given
+        public ID and system ID.
+
+        :param name: The name of the document's root element, e.g. 'html'.
+        :param pub_id: The Formal Public Identifier for this document type,
+            e.g. '-//W3C//DTD XHTML 1.1//EN'
+        :param system_id: The system identifier for this document type,
+            e.g. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
+
+        :return: A Doctype.
+        """
         value = name or ''
         if pub_id is not None:
             value += ' PUBLIC "%s"' % pub_id
@@ -723,18 +1114,80 @@ class Doctype(PreformattedString):
     SUFFIX = '>\n'
 
 
-class Tag(PageElement):
+class Stylesheet(NavigableString):
+    """A NavigableString representing an stylesheet (probably
+    CSS).
 
-    """Represents a found HTML tag with its attributes and contents."""
+    Used to distinguish embedded stylesheets from textual content.
+    """
+    pass
+
+    
+class Script(NavigableString):
+    """A NavigableString representing an executable script (probably
+    Javascript).
+
+    Used to distinguish executable code from textual content.
+    """
+    pass
+
+
+class TemplateString(NavigableString):
+    """A NavigableString representing a string found inside an HTML
+    template embedded in a larger document.
+
+    Used to distinguish such strings from the main body of the document.
+    """
+    pass
+
+
+class Tag(PageElement):
+    """Represents an HTML or XML tag that is part of a parse tree, along
+    with its attributes and contents.
+
+    When Beautiful Soup parses the markup <b>penguin</b>, it will
+    create a Tag object representing the <b> tag.
+    """
 
     def __init__(self, parser=None, builder=None, name=None, namespace=None,
                  prefix=None, attrs=None, parent=None, previous=None,
                  is_xml=None, sourceline=None, sourcepos=None,
                  can_be_empty_element=None, cdata_list_attributes=None,
-                 preserve_whitespace_tags=None
+                 preserve_whitespace_tags=None,
+                 interesting_string_types=None,
     ):
-        "Basic constructor."
+        """Basic constructor.
 
+        :param parser: A BeautifulSoup object.
+        :param builder: A TreeBuilder.
+        :param name: The name of the tag.
+        :param namespace: The URI of this Tag's XML namespace, if any.
+        :param prefix: The prefix for this Tag's XML namespace, if any.
+        :param attrs: A dictionary of this Tag's attribute values.
+        :param parent: The PageElement to use as this Tag's parent.
+        :param previous: The PageElement that was parsed immediately before
+            this tag.
+        :param is_xml: If True, this is an XML tag. Otherwise, this is an
+            HTML tag.
+        :param sourceline: The line number where this tag was found in its
+            source document.
+        :param sourcepos: The character position within `sourceline` where this
+            tag was found.
+        :param can_be_empty_element: If True, this tag should be
+            represented as <tag/>. If False, this tag should be represented
+            as <tag></tag>.
+        :param cdata_list_attributes: A list of attributes whose values should
+            be treated as CDATA if they ever show up on this tag.
+        :param preserve_whitespace_tags: A list of tag names whose contents
+            should have their whitespace preserved.
+        :param interesting_string_types: This is a NavigableString
+            subclass or a tuple of them. When iterating over this
+            Tag's strings in methods like Tag.strings or Tag.get_text,
+            these are the types of strings that are interesting enough
+            to be considered. The default is to consider
+            NavigableString and CData the only interesting string
+            subtypes.
+        """
         if parser is None:
             self.parser_class = None
         else:
@@ -779,6 +1232,7 @@ class Tag(PageElement):
             self.can_be_empty_element = can_be_empty_element
             self.cdata_list_attributes = cdata_list_attributes
             self.preserve_whitespace_tags = preserve_whitespace_tags
+            self.interesting_string_types = interesting_string_types
         else:
             # Set up any substitutions for this tag, such as the charset in a META tag.
             builder.set_up_substitutions(self)
@@ -799,6 +1253,13 @@ class Tag(PageElement):
             # Keep track of the names that might cause this tag to be treated as a
             # whitespace-preserved tag.
             self.preserve_whitespace_tags = builder.preserve_whitespace_tags
+
+            if self.name in builder.string_containers:
+                # This sort of tag uses a special string container
+                # subclass for most of its strings. When we ask the
+                self.interesting_string_types = builder.string_containers[self.name]
+            else:
+                self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES
             
     parserClass = _alias("parser_class")  # BS3
 
@@ -840,13 +1301,17 @@ class Tag(PageElement):
 
     @property
     def string(self):
-        """Convenience property to get the single string within this tag.
+        """Convenience property to get the single string within this
+        PageElement.
 
-        :Return: If this tag has a single string child, return value
-         is that string. If this tag has no children, or more than one
-         child, return value is None. If this tag has one child tag,
+        TODO It might make sense to have NavigableString.string return
+        itself.
+
+        :return: If this element has a single string child, return
+         value is that string. If this element has one child tag,
          return value is the 'string' attribute of the child tag,
-         recursively.
+         recursively. If this element is itself a string, has no
+         children, or has more than one child, return value is None.
         """
         if len(self.contents) != 1:
             return None
@@ -857,57 +1322,75 @@ class Tag(PageElement):
 
     @string.setter
     def string(self, string):
+        """Replace this PageElement's contents with `string`."""
         self.clear()
         self.append(string.__class__(string))
 
-    def _all_strings(self, strip=False, types=(NavigableString, CData)):
+    DEFAULT_INTERESTING_STRING_TYPES = (NavigableString, CData)
+    def _all_strings(self, strip=False, types=PageElement.default):
         """Yield all strings of certain classes, possibly stripping them.
 
-        By default, yields only NavigableString and CData objects. So
-        no comments, processing instructions, etc.
+        :param strip: If True, all strings will be stripped before being
+            yielded.
+
+        :param types: A tuple of NavigableString subclasses. Any strings of
+            a subclass not found in this list will be ignored. By
+            default, the subclasses considered are the ones found in
+            self.interesting_string_types. If that's not specified,
+            only NavigableString and CData objects will be
+            considered. That means no comments, processing
+            instructions, etc.
+
+        :yield: A sequence of strings.
+
         """
+        if types is self.default:
+            types = self.interesting_string_types
+
         for descendant in self.descendants:
-            if (
-                (types is None and not isinstance(descendant, NavigableString))
-                or
-                (types is not None and type(descendant) not in types)):
+            if (types is None and not isinstance(descendant, NavigableString)):
+                continue
+            descendant_type = type(descendant)
+            if isinstance(types, type):
+                if descendant_type is not types:
+                    # We're not interested in strings of this type.
+                    continue
+            elif types is not None and descendant_type not in types:
+                # We're not interested in strings of this type.
                 continue
             if strip:
                 descendant = descendant.strip()
                 if len(descendant) == 0:
                     continue
             yield descendant
-
     strings = property(_all_strings)
 
-    @property
-    def stripped_strings(self):
-        for string in self._all_strings(True):
-            yield string
-
-    def get_text(self, separator="", strip=False,
-                 types=(NavigableString, CData)):
-        """
-        Get all child strings, concatenated using the given separator.
-        """
-        return separator.join([s for s in self._all_strings(
-                    strip, types=types)])
-    getText = get_text
-    text = property(get_text)
-
     def decompose(self):
-        """Recursively destroys the contents of this tree."""
+        """Recursively destroys this PageElement and its children.
+
+        This element will be removed from the tree and wiped out; so
+        will everything beneath it.
+
+        The behavior of a decomposed PageElement is undefined and you
+        should never use one for anything, but if you need to _check_
+        whether an element has been decomposed, you can use the
+        `decomposed` property.
+        """
         self.extract()
         i = self
         while i is not None:
-            next = i.next_element
+            n = i.next_element
             i.__dict__.clear()
             i.contents = []
-            i = next
-
+            i._decomposed = True
+            i = n
+           
     def clear(self, decompose=False):
-        """
-        Extract all children. If decompose is True, decompose instead.
+        """Wipe out all children of this PageElement by calling extract()
+           on them.
+
+        :param decompose: If this is True, decompose() (a more
+            destructive method) will be called instead of extract().
         """
         if decompose:
             for element in self.contents[:]:
@@ -920,7 +1403,8 @@ class Tag(PageElement):
                 element.extract()
 
     def smooth(self):
-        """Smooth out this element's children by consolidating consecutive strings.
+        """Smooth out this element's children by consolidating consecutive
+        strings.
 
         This makes pretty-printed output look more natural following a
         lot of operations that modified the tree.
@@ -957,9 +1441,12 @@ class Tag(PageElement):
             a.replace_with(n)
 
     def index(self, element):
-        """
-        Find the index of a child by identity, not value. Avoids issues with
-        tag.contents.index(element) getting the index of equal elements.
+        """Find the index of a child by identity, not value.
+
+        Avoids issues with tag.contents.index(element) getting the
+        index of equal elements.
+
+        :param element: Look for this PageElement in `self.contents`.
         """
         for i, child in enumerate(self.contents):
             if child is element:
@@ -973,29 +1460,37 @@ class Tag(PageElement):
         return self.attrs.get(key, default)
 
     def get_attribute_list(self, key, default=None):
-        """The same as get(), but always returns a list."""
+        """The same as get(), but always returns a list.
+
+        :param key: The attribute to look for.
+        :param default: Use this value if the attribute is not present
+            on this PageElement.
+        :return: A list of values, probably containing only a single
+            value.
+        """
         value = self.get(key, default)
         if not isinstance(value, list):
             value = [value]
         return value
     
     def has_attr(self, key):
+        """Does this PageElement have an attribute with the given name?"""
         return key in self.attrs
 
     def __hash__(self):
         return str(self).__hash__()
 
     def __getitem__(self, key):
-        """tag[key] returns the value of the 'key' attribute for the tag,
+        """tag[key] returns the value of the 'key' attribute for the Tag,
         and throws an exception if it's not there."""
         return self.attrs[key]
 
     def __iter__(self):
-        "Iterating over a tag iterates over its contents."
+        "Iterating over a Tag iterates over its contents."
         return iter(self.contents)
 
     def __len__(self):
-        "The length of a tag is the length of its list of contents."
+        "The length of a Tag is the length of its list of contents."
         return len(self.contents)
 
     def __contains__(self, x):
@@ -1015,13 +1510,14 @@ class Tag(PageElement):
         self.attrs.pop(key, None)
 
     def __call__(self, *args, **kwargs):
-        """Calling a tag like a function is the same as calling its
+        """Calling a Tag like a function is the same as calling its
         find_all() method. Eg. tag('a') returns a list of all the A tags
         found within this tag."""
         return self.find_all(*args, **kwargs)
 
     def __getattr__(self, tag):
-        #print "Getattr %s.%s" % (self.__class__, tag)
+        """Calling tag.subtag is the same as calling tag.find(name="subtag")"""
+        #print("Getattr %s.%s" % (self.__class__, tag))
         if len(tag) > 3 and tag.endswith('Tag'):
             # BS3: soup.aTag -> "soup.find("a")
             tag_name = tag[:-3]
@@ -1038,8 +1534,8 @@ class Tag(PageElement):
             "'%s' object has no attribute '%s'" % (self.__class__, tag))
 
     def __eq__(self, other):
-        """Returns true iff this tag has the same name, the same attributes,
-        and the same contents (recursively) as the given tag."""
+        """Returns true iff this Tag has the same name, the same attributes,
+        and the same contents (recursively) as `other`."""
         if self is other:
             return True
         if (not hasattr(other, 'name') or
@@ -1055,12 +1551,17 @@ class Tag(PageElement):
         return True
 
     def __ne__(self, other):
-        """Returns true iff this tag is not identical to the other tag,
+        """Returns true iff this Tag is not identical to `other`,
         as defined in __eq__."""
         return not self == other
 
     def __repr__(self, encoding="unicode-escape"):
-        """Renders this tag as a string."""
+        """Renders this PageElement as a string.
+
+        :param encoding: The encoding to use (Python 2 only).
+        :return: Under Python 2, a bytestring; under Python 3,
+            a Unicode string.
+        """
         if PY3K:
             # "The return value must be a string object", i.e. Unicode
             return self.decode()
@@ -1071,9 +1572,15 @@ class Tag(PageElement):
             return self.encode(encoding)
 
     def __unicode__(self):
+        """Renders this PageElement as a Unicode string."""
         return self.decode()
 
     def __str__(self):
+        """Renders this PageElement as a generic string.
+
+        :return: Under Python 2, a UTF-8 bytestring; under Python 3,
+            a Unicode string.        
+        """
         if PY3K:
             return self.decode()
         else:
@@ -1085,6 +1592,22 @@ class Tag(PageElement):
     def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
                indent_level=None, formatter="minimal",
                errors="xmlcharrefreplace"):
+        """Render a bytestring representation of this PageElement and its
+        contents.
+
+        :param encoding: The destination encoding.
+        :param indent_level: Each line of the rendering will be
+            indented this many spaces. Used internally in
+            recursive calls while pretty-printing.
+        :param formatter: A Formatter object, or a string naming one of
+            the standard formatters.
+        :param errors: An error handling strategy such as
+            'xmlcharrefreplace'. This value is passed along into
+            encode() and its value should be one of the constants
+            defined by Python.
+        :return: A bytestring.
+
+        """
         # Turn the data structure into Unicode, then encode the
         # Unicode.
         u = self.decode(indent_level, encoding, formatter)
@@ -1093,14 +1616,20 @@ class Tag(PageElement):
     def decode(self, indent_level=None,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
                formatter="minimal"):
-        """Returns a Unicode representation of this tag and its contents.
+        """Render a Unicode representation of this PageElement and its
+        contents.
 
+        :param indent_level: Each line of the rendering will be
+             indented this many spaces. Used internally in
+             recursive calls while pretty-printing.
         :param eventual_encoding: The tag is destined to be
-           encoded into this encoding. This method is _not_
-           responsible for performing that encoding. This information
-           is passed in so that it can be substituted in if the
-           document contains a <META> tag that mentions the document's
-           encoding.
+            encoded into this encoding. This method is _not_
+            responsible for performing that encoding. This information
+            is passed in so that it can be substituted in if the
+            document contains a <META> tag that mentions the document's
+            encoding.
+        :param formatter: A Formatter object, or a string naming one of
+            the standard formatters.
         """
 
         # First off, turn a non-Formatter `formatter` into a Formatter
@@ -1186,7 +1715,11 @@ class Tag(PageElement):
         return s
 
     def _should_pretty_print(self, indent_level):
-        """Should this tag be pretty-printed?"""
+        """Should this tag be pretty-printed?
+
+        Most of them should, but some (such as <pre> in HTML
+        documents) should not.
+        """
         return (
             indent_level is not None
             and (
@@ -1196,6 +1729,15 @@ class Tag(PageElement):
         )
 
     def prettify(self, encoding=None, formatter="minimal"):
+        """Pretty-print this PageElement as a string.
+
+        :param encoding: The eventual encoding of the string. If this is None,
+            a Unicode string will be returned.
+        :param formatter: A Formatter object, or a string naming one of
+            the standard formatters.
+        :return: A Unicode string (if encoding==None) or a bytestring 
+            (otherwise).
+        """
         if encoding is None:
             return self.decode(True, formatter=formatter)
         else:
@@ -1207,7 +1749,8 @@ class Tag(PageElement):
         """Renders the contents of this tag as a Unicode string.
 
         :param indent_level: Each line of the rendering will be
-           indented this many spaces.
+           indented this many spaces. Used internally in
+           recursive calls while pretty-printing.
 
         :param eventual_encoding: The tag is destined to be
            encoded into this encoding. decode_contents() is _not_
@@ -1249,23 +1792,26 @@ class Tag(PageElement):
     def encode_contents(
         self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
         formatter="minimal"):
-        """Renders the contents of this tag as a bytestring.
+        """Renders the contents of this PageElement as a bytestring.
 
         :param indent_level: Each line of the rendering will be
-           indented this many spaces.
+           indented this many spaces. Used internally in
+           recursive calls while pretty-printing.
 
         :param eventual_encoding: The bytestring will be in this encoding.
 
-        :param formatter: The output formatter responsible for converting
-           entities to Unicode characters.
-        """
+        :param formatter: A Formatter object, or a string naming one of
+            the standard Formatters.
 
+        :return: A bytestring.
+        """
         contents = self.decode_contents(indent_level, encoding, formatter)
         return contents.encode(encoding)
 
     # Old method for BS3 compatibility
     def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
                        prettyPrint=False, indentLevel=0):
+        """Deprecated method for BS3 compatibility."""
         if not prettyPrint:
             indentLevel = None
         return self.encode_contents(
@@ -1275,27 +1821,47 @@ class Tag(PageElement):
 
     def find(self, name=None, attrs={}, recursive=True, text=None,
              **kwargs):
-        """Return only the first child of this Tag matching the given
-        criteria."""
+        """Look in the children of this PageElement and find the first
+        PageElement that matches the given criteria.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param recursive: If this is True, find() will perform a
+            recursive search of this PageElement's children. Otherwise,
+            only the direct children will be considered.
+        :param limit: Stop looking after finding this many results.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
         r = None
         l = self.find_all(name, attrs, recursive, text, 1, **kwargs)
         if l:
             r = l[0]
         return r
-    findChild = find
+    findChild = find #BS2
 
     def find_all(self, name=None, attrs={}, recursive=True, text=None,
                  limit=None, **kwargs):
-        """Extracts a list of Tag objects that match the given
-        criteria.  You can specify the name of the Tag and any
-        attributes you want the Tag to have.
+        """Look in the children of this PageElement and find all
+        PageElements that match the given criteria.
 
-        The value of a key-value pair in the 'attrs' map can be a
-        string, a list of strings, a regular expression object, or a
-        callable that takes a string and returns whether or not the
-        string matches for some custom definition of 'matches'. The
-        same is true of the tag name."""
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
 
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param recursive: If this is True, find_all() will perform a
+            recursive search of this PageElement's children. Otherwise,
+            only the direct children will be considered.
+        :param limit: Stop looking after finding this many results.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A ResultSet of PageElements.
+        :rtype: bs4.element.ResultSet
+        """
         generator = self.descendants
         if not recursive:
             generator = self.children
@@ -1306,11 +1872,20 @@ class Tag(PageElement):
     #Generator methods
     @property
     def children(self):
+        """Iterate over all direct children of this PageElement.
+
+        :yield: A sequence of PageElements.
+        """
         # return iter() to make the purpose of the method clear
         return iter(self.contents)  # XXX This seems to be untested.
 
     @property
     def descendants(self):
+        """Iterate over all children of this PageElement in a
+        breadth-first sequence.
+
+        :yield: A sequence of PageElements.
+        """
         if not len(self.contents):
             return
         stopNode = self._last_descendant().next_element
@@ -1321,7 +1896,21 @@ class Tag(PageElement):
 
     # CSS selector code
     def select_one(self, selector, namespaces=None, **kwargs):
-        """Perform a CSS selection operation on the current element."""
+        """Perform a CSS selection operation on the current element.
+
+        :param selector: A CSS selector.
+
+        :param namespaces: A dictionary mapping namespace prefixes
+           used in the CSS selector to namespace URIs. By default,
+           Beautiful Soup will use the prefixes it encountered while
+           parsing the document.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's 
+           soupsieve.select() method.
+
+        :return: A Tag.
+        :rtype: bs4.element.Tag
+        """
         value = self.select(selector, namespaces, 1, **kwargs)
         if value:
             return value[0]
@@ -1335,14 +1924,17 @@ class Tag(PageElement):
         :param selector: A string containing a CSS selector.
 
         :param namespaces: A dictionary mapping namespace prefixes
-        used in the CSS selector to namespace URIs. By default,
-        Beautiful Soup will use the prefixes it encountered while
-        parsing the document.
+           used in the CSS selector to namespace URIs. By default,
+           Beautiful Soup will use the prefixes it encountered while
+           parsing the document.
 
         :param limit: After finding this number of results, stop looking.
 
-        :param kwargs: Any extra arguments you'd like to pass in to
-        soupsieve.select().
+        :param kwargs: Keyword arguments to be passed into SoupSieve's 
+           soupsieve.select() method.
+
+        :return: A ResultSet of Tags.
+        :rtype: bs4.element.ResultSet
         """
         if namespaces is None:
             namespaces = self._namespaces
@@ -1354,19 +1946,27 @@ class Tag(PageElement):
                 "Cannot execute CSS selectors because the soupsieve package is not installed."
             )
             
-        return soupsieve.select(selector, self, namespaces, limit, **kwargs)
+        results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
+
+        # We do this because it's more consistent and because
+        # ResultSet.__getattr__ has a helpful error message.
+        return ResultSet(None, results)
 
     # Old names for backwards compatibility
     def childGenerator(self):
+        """Deprecated generator."""
         return self.children
 
     def recursiveChildGenerator(self):
+        """Deprecated generator."""
         return self.descendants
 
     def has_key(self, key):
-        """This was kind of misleading because has_key() (attributes)
-        was different from __in__ (contents). has_key() is gone in
-        Python 3, anyway."""
+        """Deprecated method. This was kind of misleading because has_key()
+        (attributes) was different from __in__ (contents).
+
+        has_key() is gone in Python 3, anyway.
+        """
         warnings.warn('has_key is deprecated. Use has_attr("%s") instead.' % (
                 key))
         return self.has_attr(key)
@@ -1374,9 +1974,26 @@ class Tag(PageElement):
 # Next, a couple classes to represent queries and their results.
 class SoupStrainer(object):
     """Encapsulates a number of ways of matching a markup element (tag or
-    text)."""
+    string).
+
+    This is primarily used to underpin the find_* methods, but you can
+    create one yourself and pass it in as `parse_only` to the
+    `BeautifulSoup` constructor, to parse a subset of a large
+    document.
+    """
 
     def __init__(self, name=None, attrs={}, text=None, **kwargs):
+        """Constructor.
+
+        The SoupStrainer constructor takes the same arguments passed
+        into the find_* methods. See the online documentation for
+        detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :kwargs: A dictionary of filters on attribute values.
+        """        
         self.name = self._normalize_search_value(name)
         if not isinstance(attrs, dict):
             # Treat a non-dict value for attrs as a search for the 'class'
@@ -1434,17 +2051,38 @@ class SoupStrainer(object):
         return str(str(value))
 
     def __str__(self):
+        """A human-readable representation of this SoupStrainer."""
         if self.text:
             return self.text
         else:
             return "%s|%s" % (self.name, self.attrs)
 
     def search_tag(self, markup_name=None, markup_attrs={}):
+        """Check whether a Tag with the given name and attributes would
+        match this SoupStrainer.
+
+        Used prospectively to decide whether to even bother creating a Tag
+        object.
+
+        :param markup_name: A tag name as found in some markup.
+        :param markup_attrs: A dictionary of attributes as found in some markup.
+
+        :return: True if the prospective tag would match this SoupStrainer;
+            False otherwise.
+        """
         found = None
         markup = None
         if isinstance(markup_name, Tag):
             markup = markup_name
             markup_attrs = markup
+
+        if isinstance(self.name, str):
+            # Optimization for a very common case where the user is
+            # searching for a tag with one specific name, and we're
+            # looking at a tag with a different name.
+            if markup and not markup.prefix and self.name != markup.name:
+                 return False
+            
         call_function_with_tag_data = (
             isinstance(self.name, Callable)
             and not isinstance(markup_name, Tag))
@@ -1478,10 +2116,19 @@ class SoupStrainer(object):
         if found and self.text and not self._matches(found.string, self.text):
             found = None
         return found
+
+    # For BS3 compatibility.
     searchTag = search_tag
 
     def search(self, markup):
-        # print 'looking for %s in %s' % (self, markup)
+        """Find all items in `markup` that match this SoupStrainer.
+
+        Used by the core _find_all() method, which is ultimately
+        called by all find_* methods.
+
+        :param markup: A PageElement or a list of them.
+        """
+        # print('looking for %s in %s' % (self, markup))
         found = None
         # If given a list of items, scan it for a text element that
         # matches.
@@ -1507,7 +2154,7 @@ class SoupStrainer(object):
         return found
 
     def _matches(self, markup, match_against, already_tried=None):
-        # print u"Matching %s against %s" % (markup, match_against)
+        # print(u"Matching %s against %s" % (markup, match_against))
         result = False
         if isinstance(markup, list) or isinstance(markup, tuple):
             # This should only happen when searching a multi-valued attribute
@@ -1593,10 +2240,16 @@ class ResultSet(list):
     """A ResultSet is just a list that keeps track of the SoupStrainer
     that created it."""
     def __init__(self, source, result=()):
+        """Constructor.
+
+        :param source: A SoupStrainer.
+        :param result: A list of PageElements.
+        """
         super(ResultSet, self).__init__(result)
         self.source = source
 
     def __getattr__(self, key):
+        """Raise a helpful exception to explain a common code fix."""
         raise AttributeError(
-            "ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key
+            "ResultSet object has no attribute '%s'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?" % key
         )
diff --git a/lib/bs4/formatter.py b/lib/bs4/formatter.py
index 7dbaa385..3bd9f859 100644
--- a/lib/bs4/formatter.py
+++ b/lib/bs4/formatter.py
@@ -5,6 +5,28 @@ class Formatter(EntitySubstitution):
 
     Some parts of this strategy come from the distinction between
     HTML4, HTML5, and XML. Others are configurable by the user.
+
+    Formatters are passed in as the `formatter` argument to methods
+    like `PageElement.encode`. Most people won't need to think about
+    formatters, and most people who need to think about them can pass
+    in one of these predefined strings as `formatter` rather than
+    making a new Formatter object:
+
+    For HTML documents:
+     * 'html' - HTML entity substitution for generic HTML documents. (default)
+     * 'html5' - HTML entity substitution for HTML5 documents, as
+                 well as some optimizations in the way tags are rendered.
+     * 'minimal' - Only make the substitutions necessary to guarantee
+                   valid HTML.
+     * None - Do not perform any substitution. This will be faster
+              but may result in invalid markup.
+
+    For XML documents:
+     * 'html' - Entity substitution for XHTML documents.
+     * 'minimal' - Only make the substitutions necessary to guarantee
+                   valid XML. (default)
+     * None - Do not perform any substitution. This will be faster
+              but may result in invalid markup.
     """
     # Registries of XML and HTML formatters.
     XML_FORMATTERS = {}
@@ -27,11 +49,26 @@ class Formatter(EntitySubstitution):
     def __init__(
             self, language=None, entity_substitution=None,
             void_element_close_prefix='/', cdata_containing_tags=None,
+            empty_attributes_are_booleans=False,
     ):
-        """
+        """Constructor.
 
-        :param void_element_close_prefix: By default, represent void
-        elements as <tag/> rather than <tag>
+        :param language: This should be Formatter.XML if you are formatting
+           XML markup and Formatter.HTML if you are formatting HTML markup.
+
+        :param entity_substitution: A function to call to replace special
+           characters with XML/HTML entities. For examples, see 
+           bs4.dammit.EntitySubstitution.substitute_html and substitute_xml.
+        :param void_element_close_prefix: By default, void elements
+           are represented as <tag/> (XML rules) rather than <tag>
+           (HTML rules). To get <tag>, pass in the empty string.
+        :param cdata_containing_tags: The list of tags that are defined
+           as containing CDATA in this dialect. For example, in HTML,
+           <script> and <style> tags are defined as containing CDATA,
+           and their contents should not be formatted.
+        :param blank_attributes_are_booleans: Render attributes whose value
+            is the empty string as HTML-style boolean attributes.
+            (Attributes whose value is None are always rendered this way.)
         """
         self.language = language
         self.entity_substitution = entity_substitution
@@ -39,9 +76,17 @@ class Formatter(EntitySubstitution):
         self.cdata_containing_tags = self._default(
             language, cdata_containing_tags, 'cdata_containing_tags'
         )
-            
+        self.empty_attributes_are_booleans=empty_attributes_are_booleans
+        
     def substitute(self, ns):
-        """Process a string that needs to undergo entity substitution."""
+        """Process a string that needs to undergo entity substitution.
+        This may be a string encountered in an attribute value or as
+        text.
+
+        :param ns: A string.
+        :return: A string with certain characters replaced by named
+           or numeric entities.
+        """
         if not self.entity_substitution:
             return ns
         from .element import NavigableString
@@ -54,21 +99,41 @@ class Formatter(EntitySubstitution):
         return self.entity_substitution(ns)
 
     def attribute_value(self, value):
-        """Process the value of an attribute."""
+        """Process the value of an attribute.
+
+        :param ns: A string.
+        :return: A string with certain characters replaced by named
+           or numeric entities.
+        """
         return self.substitute(value)
     
     def attributes(self, tag):
-        """Reorder a tag's attributes however you want."""
-        return sorted(tag.attrs.items())
+        """Reorder a tag's attributes however you want.
+        
+        By default, attributes are sorted alphabetically. This makes
+        behavior consistent between Python 2 and Python 3, and preserves
+        backwards compatibility with older versions of Beautiful Soup.
 
+        If `empty_boolean_attributes` is True, then attributes whose
+        values are set to the empty string will be treated as boolean
+        attributes.
+        """
+        if tag.attrs is None:
+            return []
+        return sorted(
+            (k, (None if self.empty_attributes_are_booleans and v == '' else v))
+            for k, v in list(tag.attrs.items())
+        )
    
 class HTMLFormatter(Formatter):
+    """A generic Formatter for HTML."""
     REGISTRY = {}
     def __init__(self, *args, **kwargs):
         return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
 
     
 class XMLFormatter(Formatter):
+    """A generic Formatter for XML."""
     REGISTRY = {}
     def __init__(self, *args, **kwargs):
         return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
@@ -80,7 +145,8 @@ HTMLFormatter.REGISTRY['html'] = HTMLFormatter(
 )
 HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
     entity_substitution=EntitySubstitution.substitute_html,
-    void_element_close_prefix = None
+    void_element_close_prefix=None,
+    empty_attributes_are_booleans=True,
 )
 HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
     entity_substitution=EntitySubstitution.substitute_xml
diff --git a/lib/bs4/testing.py b/lib/bs4/testing.py
index cc996660..2f9046ad 100644
--- a/lib/bs4/testing.py
+++ b/lib/bs4/testing.py
@@ -8,6 +8,7 @@ import pickle
 import copy
 import functools
 import unittest
+import warnings
 from unittest import TestCase
 from bs4 import BeautifulSoup
 from bs4.element import (
@@ -15,7 +16,10 @@ from bs4.element import (
     Comment,
     ContentMetaAttributeValue,
     Doctype,
+    PYTHON_SPECIFIC_ENCODINGS,
     SoupStrainer,
+    Script,
+    Stylesheet,
     Tag
 )
 
@@ -83,8 +87,22 @@ class SoupTest(unittest.TestCase):
         if compare_parsed_to is None:
             compare_parsed_to = to_parse
 
+        # Verify that the documents come out the same.
         self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
 
+        # Also run some checks on the BeautifulSoup object itself:
+
+        # Verify that every tag that was opened was eventually closed.
+
+        # There are no tags in the open tag counter.
+        assert all(v==0 for v in list(obj.open_tag_counter.values()))
+
+        # The only tag in the tag stack is the one for the root
+        # document.
+        self.assertEqual(
+            [obj.ROOT_TAG_NAME], [x.name for x in obj.tagStack]
+        )
+        
     def assertConnectedness(self, element):
         """Ensure that next_element and previous_element are properly
         set for all descendants of the given element.
@@ -211,7 +229,41 @@ class SoupTest(unittest.TestCase):
             return child
 
 
-class HTMLTreeBuilderSmokeTest(object):
+class TreeBuilderSmokeTest(object):
+    # Tests that are common to HTML and XML tree builders.
+
+    def test_fuzzed_input(self):
+        # This test centralizes in one place the various fuzz tests
+        # for Beautiful Soup created by the oss-fuzz project.
+        
+        # These strings superficially resemble markup, but they
+        # generally can't be parsed into anything. The best we can
+        # hope for is that parsing these strings won't crash the
+        # parser.
+        #
+        # n.b. This markup is commented out because these fuzz tests
+        # _do_ crash the parser. However the crashes are due to bugs
+        # in html.parser, not Beautiful Soup -- otherwise I'd fix the
+        # bugs!
+        
+        bad_markup = [
+            # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
+            # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
+            # https://bugs.python.org/issue37747
+            #
+            #b'\n<![\xff\xfe\xfe\xcd\x00',
+
+            #https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
+            # https://bugs.python.org/issue34480
+            #
+            #b'<![n\x00'
+        ]
+        for markup in bad_markup:
+            with warnings.catch_warnings(record=False):
+                soup = self.soup(markup)
+        
+
+class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
 
     """A basic test of a treebuilder's competence.
 
@@ -233,6 +285,22 @@ class HTMLTreeBuilderSmokeTest(object):
             new_tag = soup.new_tag(name)
             self.assertEqual(True, new_tag.is_empty_element)
 
+    def test_special_string_containers(self):
+        soup = self.soup(
+            "<style>Some CSS</style><script>Some Javascript</script>"
+        )
+        assert isinstance(soup.style.string, Stylesheet)
+        assert isinstance(soup.script.string, Script)
+
+        soup = self.soup(
+            "<style><!--Some CSS--></style>"
+        )
+        assert isinstance(soup.style.string, Stylesheet)
+        # The contents of the style tag resemble an HTML comment, but
+        # it's not treated as a comment.
+        self.assertEqual("<!--Some CSS-->", soup.style.string)
+        assert isinstance(soup.style.string, Stylesheet)
+        
     def test_pickle_and_unpickle_identity(self):
         # Pickling a tree, then unpickling it, yields a tree identical
         # to the original.
@@ -250,18 +318,21 @@ class HTMLTreeBuilderSmokeTest(object):
         doctype = soup.contents[0]
         self.assertEqual(doctype.__class__, Doctype)
         self.assertEqual(doctype, doctype_fragment)
-        self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
+        self.assertEqual(
+            soup.encode("utf8")[:len(doctype_str)],
+            doctype_str
+        )
 
         # Make sure that the doctype was correctly associated with the
         # parse tree and that the rest of the document parsed.
         self.assertEqual(soup.p.contents[0], 'foo')
 
-    def _document_with_doctype(self, doctype_fragment):
+    def _document_with_doctype(self, doctype_fragment, doctype_string="DOCTYPE"):
         """Generate and parse a document with the given doctype."""
-        doctype = '<!DOCTYPE %s>' % doctype_fragment
+        doctype = '<!%s %s>' % (doctype_string, doctype_fragment)
         markup = doctype + '\n<p>foo</p>'
         soup = self.soup(markup)
-        return doctype, soup
+        return doctype.encode("utf8"), soup
 
     def test_normal_doctypes(self):
         """Make sure normal, everyday HTML doctypes are handled correctly."""
@@ -274,6 +345,27 @@ class HTMLTreeBuilderSmokeTest(object):
         doctype = soup.contents[0]
         self.assertEqual("", doctype.strip())
 
+    def test_mixed_case_doctype(self):
+        # A lowercase or mixed-case doctype becomes a Doctype.
+        for doctype_fragment in ("doctype", "DocType"):
+            doctype_str, soup = self._document_with_doctype(
+                "html", doctype_fragment
+            )
+
+            # Make sure a Doctype object was created and that the DOCTYPE
+            # is uppercase.
+            doctype = soup.contents[0]
+            self.assertEqual(doctype.__class__, Doctype)
+            self.assertEqual(doctype, "html")
+            self.assertEqual(
+                soup.encode("utf8")[:len(doctype_str)],
+                b"<!DOCTYPE html>"
+            )
+
+            # Make sure that the doctype was correctly associated with the
+            # parse tree and that the rest of the document parsed.
+            self.assertEqual(soup.p.contents[0], 'foo')
+        
     def test_public_doctype_with_url(self):
         doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
         self.assertDoctypeHandled(doctype)
@@ -532,7 +624,7 @@ Hello, world!
         self.assertSoupEquals("&#10000000000000;", expect)
         self.assertSoupEquals("&#x10000000000000;", expect)
         self.assertSoupEquals("&#1000000000;", expect)
-        
+       
     def test_multipart_strings(self):
         "Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
         soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
@@ -594,7 +686,7 @@ Hello, world!
         markup = b'<a class="foo bar">'
         soup = self.soup(markup)
         self.assertEqual(['foo', 'bar'], soup.a['class'])
-
+        
     #
     # Generally speaking, tests below this point are more tests of
     # Beautiful Soup than tests of the tree builders. But parsers are
@@ -779,11 +871,44 @@ Hello, world!
         # encoding.
         self.assertEqual('utf8', charset.encode("utf8"))
 
+    def test_python_specific_encodings_not_used_in_charset(self):
+        # You can encode an HTML document using a Python-specific
+        # encoding, but that encoding won't be mentioned _inside_ the
+        # resulting document. Instead, the document will appear to
+        # have no encoding.
+        for markup in [
+            b'<meta charset="utf8"></head>'
+            b'<meta id="encoding" charset="utf-8" />'
+        ]:
+            soup = self.soup(markup)
+            for encoding in PYTHON_SPECIFIC_ENCODINGS:
+                if encoding in (
+                    'idna', 'mbcs', 'oem', 'undefined',
+                    'string_escape', 'string-escape'
+                ):
+                    # For one reason or another, these will raise an
+                    # exception if we actually try to use them, so don't
+                    # bother.
+                    continue
+                encoded = soup.encode(encoding)
+                assert b'meta charset=""' in encoded
+                assert encoding.encode("ascii") not in encoded
+        
     def test_tag_with_no_attributes_can_have_attributes_added(self):
         data = self.soup("<a>text</a>")
         data.a['foo'] = 'bar'
         self.assertEqual('<a foo="bar">text</a>', data.a.decode())
 
+    def test_closing_tag_with_no_opening_tag(self):
+        # Without BeautifulSoup.open_tag_counter, the </span> tag will
+        # cause _popToTag to be called over and over again as we look
+        # for a <span> tag that wasn't there. The result is that 'text2'
+        # will show up outside the body of the document.
+        soup = self.soup("<body><div><p>text1</p></span>text2</div></body>")
+        self.assertEqual(
+            "<body><div><p>text1</p>text2</div></body>", soup.body.decode()
+        )
+        
     def test_worst_case(self):
         """Test the worst case (currently) for linking issues."""
 
@@ -791,7 +916,7 @@ Hello, world!
         self.linkage_validator(soup)
 
 
-class XMLTreeBuilderSmokeTest(object):
+class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
 
     def test_pickle_and_unpickle_identity(self):
         # Pickling a tree, then unpickling it, yields a tree identical
@@ -812,6 +937,25 @@ class XMLTreeBuilderSmokeTest(object):
         soup = self.soup(markup)
         self.assertEqual(markup, soup.encode("utf8"))
 
+    def test_python_specific_encodings_not_used_in_xml_declaration(self):
+        # You can encode an XML document using a Python-specific
+        # encoding, but that encoding won't be mentioned _inside_ the
+        # resulting document.
+        markup = b"""<?xml version="1.0"?>\n<foo/>"""
+        soup = self.soup(markup)
+        for encoding in PYTHON_SPECIFIC_ENCODINGS:
+            if encoding in (
+                'idna', 'mbcs', 'oem', 'undefined',
+                'string_escape', 'string-escape'
+            ):
+                # For one reason or another, these will raise an
+                # exception if we actually try to use them, so don't
+                # bother.
+                continue
+            encoded = soup.encode(encoding)
+            assert b'<?xml version="1.0"?>' in encoded
+            assert encoding.encode("ascii") not in encoded
+
     def test_processing_instruction(self):
         markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
         soup = self.soup(markup)
@@ -828,7 +972,7 @@ class XMLTreeBuilderSmokeTest(object):
         soup = self.soup(markup)
         self.assertEqual(
             soup.encode("utf-8"), markup)
-
+       
     def test_nested_namespaces(self):
         doc = b"""<?xml version="1.0" encoding="utf-8"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
diff --git a/lib/bs4/tests/test_html5lib.py b/lib/bs4/tests/test_html5lib.py
index d7a0b298..f8902ad7 100644
--- a/lib/bs4/tests/test_html5lib.py
+++ b/lib/bs4/tests/test_html5lib.py
@@ -182,3 +182,45 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
         soup = self.soup(markup, store_line_numbers=False)
         self.assertEqual("sourceline", soup.p.sourceline.name)
         self.assertEqual("sourcepos", soup.p.sourcepos.name)
+
+    def test_special_string_containers(self):
+        # The html5lib tree builder doesn't support this standard feature,
+        # because there's no way of knowing, when a string is created,
+        # where in the tree it will eventually end up.
+        pass
+
+    def test_html5_attributes(self):
+        # The html5lib TreeBuilder can convert any entity named in
+        # the HTML5 spec to a sequence of Unicode characters, and
+        # convert those Unicode characters to a (potentially
+        # different) named entity on the way out.
+        #
+        # This is a copy of the same test from
+        # HTMLParserTreeBuilderSmokeTest.  It's not in the superclass
+        # because the lxml HTML TreeBuilder _doesn't_ work this way.
+        for input_element, output_unicode, output_element in (
+                ("&RightArrowLeftArrow;", '\u21c4', b'&rlarr;'),
+                ('&models;', '\u22a7', b'&models;'),
+                ('&Nfr;', '\U0001d511', b'&Nfr;'),
+                ('&ngeqq;', '\u2267\u0338', b'&ngeqq;'),
+                ('&not;', '\xac', b'&not;'),
+                ('&Not;', '\u2aec', b'&Not;'),
+                ('&quot;', '"', b'"'),
+                ('&there4;', '\u2234', b'&there4;'),
+                ('&Therefore;', '\u2234', b'&there4;'),
+                ('&therefore;', '\u2234', b'&there4;'),
+                ("&fjlig;", 'fj', b'fj'),                
+                ("&sqcup;", '\u2294', b'&sqcup;'),
+                ("&sqcups;", '\u2294\ufe00', b'&sqcups;'),
+                ("&apos;", "'", b"'"),
+                ("&verbar;", "|", b"|"),
+        ):
+            markup = '<div>%s</div>' % input_element
+            div = self.soup(markup).div
+            without_element = div.encode()
+            expect = b"<div>%s</div>" % output_unicode.encode("utf8")
+            self.assertEqual(without_element, expect)
+
+            with_element = div.encode(formatter="html")
+            expect = b"<div>%s</div>" % output_element
+            self.assertEqual(with_element, expect)
diff --git a/lib/bs4/tests/test_htmlparser.py b/lib/bs4/tests/test_htmlparser.py
index 7be64935..0d8161ef 100644
--- a/lib/bs4/tests/test_htmlparser.py
+++ b/lib/bs4/tests/test_htmlparser.py
@@ -3,6 +3,7 @@ trees."""
 
 from pdb import set_trace
 import pickle
+import warnings
 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
 from bs4.builder import HTMLParserTreeBuilder
 from bs4.builder._htmlparser import BeautifulSoupHTMLParser
@@ -51,11 +52,83 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
         self.assertEqual("sourceline", soup.p.sourceline.name)
         self.assertEqual("sourcepos", soup.p.sourcepos.name)
 
+    def test_on_duplicate_attribute(self):
+        # The html.parser tree builder has a variety of ways of
+        # handling a tag that contains the same attribute multiple times.
+
+        markup = '<a class="cls" href="url1" href="url2" href="url3" id="id">'
+
+        # If you don't provide any particular value for
+        # on_duplicate_attribute, later values replace earlier values.
+        soup = self.soup(markup)
+        self.assertEqual("url3", soup.a['href'])
+        self.assertEqual(["cls"], soup.a['class'])
+        self.assertEqual("id", soup.a['id'])
         
+        # You can also get this behavior explicitly.
+        def assert_attribute(on_duplicate_attribute, expected):
+            soup = self.soup(
+                markup, on_duplicate_attribute=on_duplicate_attribute
+            )
+            self.assertEqual(expected, soup.a['href'])
+
+            # Verify that non-duplicate attributes are treated normally.
+            self.assertEqual(["cls"], soup.a['class'])
+            self.assertEqual("id", soup.a['id'])
+        assert_attribute(None, "url3")
+        assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3")
+
+        # You can ignore subsequent values in favor of the first.
+        assert_attribute(BeautifulSoupHTMLParser.IGNORE, "url1")
+
+        # And you can pass in a callable that does whatever you want.
+        def accumulate(attrs, key, value):
+            if not isinstance(attrs[key], list):
+                attrs[key] = [attrs[key]]
+            attrs[key].append(value)
+        assert_attribute(accumulate, ["url1", "url2", "url3"])            
+
+    def test_html5_attributes(self):
+        # The html.parser TreeBuilder can convert any entity named in
+        # the HTML5 spec to a sequence of Unicode characters, and
+        # convert those Unicode characters to a (potentially
+        # different) named entity on the way out.
+        for input_element, output_unicode, output_element in (
+                ("&RightArrowLeftArrow;", '\u21c4', b'&rlarr;'),
+                ('&models;', '\u22a7', b'&models;'),
+                ('&Nfr;', '\U0001d511', b'&Nfr;'),
+                ('&ngeqq;', '\u2267\u0338', b'&ngeqq;'),
+                ('&not;', '\xac', b'&not;'),
+                ('&Not;', '\u2aec', b'&Not;'),
+                ('&quot;', '"', b'"'),
+                ('&there4;', '\u2234', b'&there4;'),
+                ('&Therefore;', '\u2234', b'&there4;'),
+                ('&therefore;', '\u2234', b'&there4;'),
+                ("&fjlig;", 'fj', b'fj'),                
+                ("&sqcup;", '\u2294', b'&sqcup;'),
+                ("&sqcups;", '\u2294\ufe00', b'&sqcups;'),
+                ("&apos;", "'", b"'"),
+                ("&verbar;", "|", b"|"),
+        ):
+            markup = '<div>%s</div>' % input_element
+            div = self.soup(markup).div
+            without_element = div.encode()
+            expect = b"<div>%s</div>" % output_unicode.encode("utf8")
+            self.assertEqual(without_element, expect)
+
+            with_element = div.encode(formatter="html")
+            expect = b"<div>%s</div>" % output_element
+            self.assertEqual(with_element, expect)
+
+
 class TestHTMLParserSubclass(SoupTest):
     def test_error(self):
         """Verify that our HTMLParser subclass implements error() in a way
         that doesn't cause a crash.
         """
         parser = BeautifulSoupHTMLParser()
-        parser.error("don't crash")
+        with warnings.catch_warnings(record=True) as warns:
+            parser.error("don't crash")
+        [warning] = warns
+        assert "don't crash" == str(warning.message)
+
diff --git a/lib/bs4/tests/test_lxml.py b/lib/bs4/tests/test_lxml.py
index 3d0c75fa..71931ffe 100644
--- a/lib/bs4/tests/test_lxml.py
+++ b/lib/bs4/tests/test_lxml.py
@@ -45,7 +45,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
             "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
         self.assertSoupEquals(
             "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
-
+        
     def test_entities_in_foreign_document_encoding(self):
         # We can't implement this case correctly because by the time we
         # hear about markup like "&#147;", it's been (incorrectly) converted into
diff --git a/lib/bs4/tests/test_soup.py b/lib/bs4/tests/test_soup.py
index 0e7dac11..4d00845d 100644
--- a/lib/bs4/tests/test_soup.py
+++ b/lib/bs4/tests/test_soup.py
@@ -3,6 +3,7 @@
 
 from pdb import set_trace
 import logging
+import os
 import unittest
 import sys
 import tempfile
@@ -10,6 +11,8 @@ import tempfile
 from bs4 import (
     BeautifulSoup,
     BeautifulStoneSoup,
+    GuessedAtParserWarning,
+    MarkupResemblesLocatorWarning,
 )
 from bs4.builder import (
     TreeBuilder,
@@ -29,7 +32,6 @@ import bs4.dammit
 from bs4.dammit import (
     EntitySubstitution,
     UnicodeDammit,
-    EncodingDetector,
 )
 from bs4.testing import (
     default_builder,
@@ -73,6 +75,7 @@ class TestConstructor(SoupTest):
                 self.store_line_numbers = False
                 self.cdata_list_attributes = []
                 self.preserve_whitespace_tags = []
+                self.string_containers = {}
             def initialize_soup(self, soup):
                 pass
             def feed(self, markup):
@@ -186,28 +189,69 @@ class TestConstructor(SoupTest):
             isinstance(x, (TagPlus, StringPlus, CommentPlus))
             for x in soup.recursiveChildGenerator()
         )
+
+    def test_alternate_string_containers(self):
+        # Test the ability to customize the string containers for
+        # different types of tags.
+        class PString(NavigableString):
+            pass
+
+        class BString(NavigableString):
+            pass
+
+        soup = self.soup(
+            "<div>Hello.<p>Here is <b>some <i>bolded</i></b> text",
+            string_containers = {
+                'b': BString,
+                'p': PString,
+            }
+        )
+
+        # The string before the <p> tag is a regular NavigableString.
+        assert isinstance(soup.div.contents[0], NavigableString)
         
+        # The string inside the <p> tag, but not inside the <i> tag,
+        # is a PString.
+        assert isinstance(soup.p.contents[0], PString)
+
+        # Every string inside the <b> tag is a BString, even the one that
+        # was also inside an <i> tag.
+        for s in soup.b.strings:
+            assert isinstance(s, BString)
+
+        # Now that parsing was complete, the string_container_stack
+        # (where this information was kept) has been cleared out.
+        self.assertEqual([], soup.string_container_stack)
+
+
 class TestWarnings(SoupTest):
 
-    def _no_parser_specified(self, s, is_there=True):
-        v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80])
-        self.assertTrue(v)
+    def _assert_warning(self, warnings, cls):
+        for w in warnings:
+            if isinstance(w.message, cls):
+                return w
+        raise Exception("%s warning not found in %r" % cls, warnings)
+    
+    def _assert_no_parser_specified(self, w):
+        warning = self._assert_warning(w, GuessedAtParserWarning)
+        message = str(warning.message)
+        self.assertTrue(
+            message.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:60])
+        )
 
     def test_warning_if_no_parser_specified(self):
         with warnings.catch_warnings(record=True) as w:
-            soup = self.soup("<a><b></b></a>")
-        msg = str(w[0].message)
-        self._assert_no_parser_specified(msg)
+            soup = BeautifulSoup("<a><b></b></a>")
+        self._assert_no_parser_specified(w)
 
     def test_warning_if_parser_specified_too_vague(self):
         with warnings.catch_warnings(record=True) as w:
-            soup = self.soup("<a><b></b></a>", "html")
-        msg = str(w[0].message)
-        self._assert_no_parser_specified(msg)
+            soup = BeautifulSoup("<a><b></b></a>", "html")
+        self._assert_no_parser_specified(w)
 
     def test_no_warning_if_explicit_parser_specified(self):
         with warnings.catch_warnings(record=True) as w:
-            soup = self.soup("<a><b></b></a>", "html.parser")
+            soup = BeautifulSoup("<a><b></b></a>", "html.parser")
         self.assertEqual([], w)
 
     def test_parseOnlyThese_renamed_to_parse_only(self):
@@ -231,41 +275,58 @@ class TestWarnings(SoupTest):
         self.assertRaises(
             TypeError, self.soup, "<a>", no_such_argument=True)
 
-class TestWarnings(SoupTest):
-
     def test_disk_file_warning(self):
         filehandle = tempfile.NamedTemporaryFile()
         filename = filehandle.name
         try:
             with warnings.catch_warnings(record=True) as w:
                 soup = self.soup(filename)
-            msg = str(w[0].message)
-            self.assertTrue("looks like a filename" in msg)
+            warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
+            self.assertTrue("looks like a filename" in str(warning.message))
         finally:
             filehandle.close()
 
         # The file no longer exists, so Beautiful Soup will no longer issue the warning.
         with warnings.catch_warnings(record=True) as w:
             soup = self.soup(filename)
-        self.assertEqual(0, len(w))
+        self.assertEqual([], w)
 
+    def test_directory_warning(self):
+        try:
+            filename = tempfile.mkdtemp()
+            with warnings.catch_warnings(record=True) as w:
+                soup = self.soup(filename)
+            warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
+            self.assertTrue("looks like a directory" in str(warning.message))
+        finally:
+            os.rmdir(filename)
+
+        # The directory no longer exists, so Beautiful Soup will no longer issue the warning.
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup(filename)
+        self.assertEqual([], w)
+        
     def test_url_warning_with_bytes_url(self):
         with warnings.catch_warnings(record=True) as warning_list:
             soup = self.soup(b"http://www.crummybytes.com/")
-        # Be aware this isn't the only warning that can be raised during
-        # execution..
-        self.assertTrue(any("looks like a URL" in str(w.message) 
-            for w in warning_list))
+        warning = self._assert_warning(
+            warning_list, MarkupResemblesLocatorWarning
+        )
+        self.assertTrue("looks like a URL" in str(warning.message))
 
     def test_url_warning_with_unicode_url(self):
         with warnings.catch_warnings(record=True) as warning_list:
             # note - this url must differ from the bytes one otherwise
             # python's warnings system swallows the second warning
             soup = self.soup("http://www.crummyunicode.com/")
-        self.assertTrue(any("looks like a URL" in str(w.message) 
-            for w in warning_list))
+        warning = self._assert_warning(
+            warning_list, MarkupResemblesLocatorWarning
+        )
+        self.assertTrue("looks like a URL" in str(warning.message))
 
     def test_url_warning_with_bytes_and_space(self):
+        # Here the markup contains something besides a URL, so no warning
+        # is issued.
         with warnings.catch_warnings(record=True) as warning_list:
             soup = self.soup(b"http://www.crummybytes.com/ is great")
         self.assertFalse(any("looks like a URL" in str(w.message) 
@@ -307,6 +368,51 @@ class TestEntitySubstitution(unittest.TestCase):
         self.assertEqual(self.sub.substitute_html(dammit.markup),
                           "&lsquo;&rsquo;foo&ldquo;&rdquo;")
 
+    def test_html5_entity(self):
+        # Some HTML5 entities correspond to single- or multi-character
+        # Unicode sequences.
+
+        for entity, u in (
+            # A few spot checks of our ability to recognize
+            # special character sequences and convert them
+            # to named entities.
+            ('&models;', '\u22a7'),
+            ('&Nfr;', '\U0001d511'),
+            ('&ngeqq;', '\u2267\u0338'),
+            ('&not;', '\xac'),
+            ('&Not;', '\u2aec'),
+                
+            # We _could_ convert | to &verbarr;, but we don't, because
+            # | is an ASCII character.
+            ('|' '|'),
+
+            # Similarly for the fj ligature, which we could convert to
+            # &fjlig;, but we don't.
+            ("fj", "fj"),
+
+            # We do convert _these_ ASCII characters to HTML entities,
+            # because that's required to generate valid HTML.
+            ('&gt;', '>'),
+            ('&lt;', '<'),
+            ('&amp;', '&'),
+        ):
+            template = '3 %s 4'
+            raw = template % u
+            with_entities = template % entity
+            self.assertEqual(self.sub.substitute_html(raw), with_entities)
+            
+    def test_html5_entity_with_variation_selector(self):
+        # Some HTML5 entities correspond either to a single-character
+        # Unicode sequence _or_ to the same character plus U+FE00,
+        # VARIATION SELECTOR 1. We can handle this.
+        data = "fjords \u2294 penguins"
+        markup = "fjords &sqcup; penguins"
+        self.assertEqual(self.sub.substitute_html(data), markup)
+
+        data = "fjords \u2294\ufe00 penguins"
+        markup = "fjords &sqcups; penguins"
+        self.assertEqual(self.sub.substitute_html(data), markup)
+        
     def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
         s = 'Welcome to "my bar"'
         self.assertEqual(self.sub.substitute_xml(s, False), s)
@@ -416,235 +522,26 @@ class TestEncodingConversion(SoupTest):
         markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
         self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
 
-class TestUnicodeDammit(unittest.TestCase):
-    """Standalone tests of UnicodeDammit."""
 
-    def test_unicode_input(self):
-        markup = "I'm already Unicode! \N{SNOWMAN}"
-        dammit = UnicodeDammit(markup)
-        self.assertEqual(dammit.unicode_markup, markup)
-
-    def test_smart_quotes_to_unicode(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup)
-        self.assertEqual(
-            dammit.unicode_markup, "<foo>\u2018\u2019\u201c\u201d</foo>")
-
-    def test_smart_quotes_to_xml_entities(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="xml")
-        self.assertEqual(
-            dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
-
-    def test_smart_quotes_to_html_entities(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="html")
-        self.assertEqual(
-            dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
-
-    def test_smart_quotes_to_ascii(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
-        self.assertEqual(
-            dammit.unicode_markup, """<foo>''""</foo>""")
-
-    def test_detect_utf8(self):
-        utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
-        dammit = UnicodeDammit(utf8)
-        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
-        self.assertEqual(dammit.unicode_markup, 'Sacr\xe9 bleu! \N{SNOWMAN}')
-
-
-    def test_convert_hebrew(self):
-        hebrew = b"\xed\xe5\xec\xf9"
-        dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
-        self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
-        self.assertEqual(dammit.unicode_markup, '\u05dd\u05d5\u05dc\u05e9')
-
-    def test_dont_see_smart_quotes_where_there_are_none(self):
-        utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
-        dammit = UnicodeDammit(utf_8)
-        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
-        self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
-
-    def test_ignore_inappropriate_codecs(self):
-        utf8_data = "Räksmörgås".encode("utf-8")
-        dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
-        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
-
-    def test_ignore_invalid_codecs(self):
-        utf8_data = "Räksmörgås".encode("utf-8")
-        for bad_encoding in ['.utf8', '...', 'utF---16.!']:
-            dammit = UnicodeDammit(utf8_data, [bad_encoding])
-            self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
-
-    def test_exclude_encodings(self):
-        # This is UTF-8.
-        utf8_data = "Räksmörgås".encode("utf-8")
-
-        # But if we exclude UTF-8 from consideration, the guess is
-        # Windows-1252.
-        dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"])
-        self.assertEqual(dammit.original_encoding.lower(), 'windows-1252')
-
-        # And if we exclude that, there is no valid guess at all.
-        dammit = UnicodeDammit(
-            utf8_data, exclude_encodings=["utf-8", "windows-1252"])
-        self.assertEqual(dammit.original_encoding, None)
-
-    def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
-        detected = EncodingDetector(
-            b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
-        encodings = list(detected.encodings)
-        assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings
-
-    def test_detect_html5_style_meta_tag(self):
-
-        for data in (
-            b'<html><meta charset="euc-jp" /></html>',
-            b"<html><meta charset='euc-jp' /></html>",
-            b"<html><meta charset=euc-jp /></html>",
-            b"<html><meta charset=euc-jp/></html>"):
-            dammit = UnicodeDammit(data, is_html=True)
-            self.assertEqual(
-                "euc-jp", dammit.original_encoding)
-
-    def test_last_ditch_entity_replacement(self):
-        # This is a UTF-8 document that contains bytestrings
-        # completely incompatible with UTF-8 (ie. encoded with some other
-        # encoding).
-        #
-        # Since there is no consistent encoding for the document,
-        # Unicode, Dammit will eventually encode the document as UTF-8
-        # and encode the incompatible characters as REPLACEMENT
-        # CHARACTER.
-        #
-        # If chardet is installed, it will detect that the document
-        # can be converted into ISO-8859-1 without errors. This happens
-        # to be the wrong encoding, but it is a consistent encoding, so the
-        # code we're testing here won't run.
-        #
-        # So we temporarily disable chardet if it's present.
-        doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
-<html><b>\330\250\330\252\330\261</b>
-<i>\310\322\321\220\312\321\355\344</i></html>"""
-        chardet = bs4.dammit.chardet_dammit
-        logging.disable(logging.WARNING)
-        try:
-            def noop(str):
-                return None
-            bs4.dammit.chardet_dammit = noop
-            dammit = UnicodeDammit(doc)
-            self.assertEqual(True, dammit.contains_replacement_characters)
-            self.assertTrue("\ufffd" in dammit.unicode_markup)
-
-            soup = BeautifulSoup(doc, "html.parser")
-            self.assertTrue(soup.contains_replacement_characters)
-        finally:
-            logging.disable(logging.NOTSET)
-            bs4.dammit.chardet_dammit = chardet
-
-    def test_byte_order_mark_removed(self):
-        # A document written in UTF-16LE will have its byte order marker stripped.
-        data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
-        dammit = UnicodeDammit(data)
-        self.assertEqual("<a>áé</a>", dammit.unicode_markup)
-        self.assertEqual("utf-16le", dammit.original_encoding)
-
-    def test_detwingle(self):
-        # Here's a UTF8 document.
-        utf8 = ("\N{SNOWMAN}" * 3).encode("utf8")
-
-        # Here's a Windows-1252 document.
-        windows_1252 = (
-            "\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
-            "\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
-
-        # Through some unholy alchemy, they've been stuck together.
-        doc = utf8 + windows_1252 + utf8
-
-        # The document can't be turned into UTF-8:
-        self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
-
-        # Unicode, Dammit thinks the whole document is Windows-1252,
-        # and decodes it into "â˜ƒâ˜ƒâ˜ƒ“Hi, I like Windows!”â˜ƒâ˜ƒâ˜ƒ"
-
-        # But if we run it through fix_embedded_windows_1252, it's fixed:
-
-        fixed = UnicodeDammit.detwingle(doc)
-        self.assertEqual(
-            "☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
-
-    def test_detwingle_ignores_multibyte_characters(self):
-        # Each of these characters has a UTF-8 representation ending
-        # in \x93. \x93 is a smart quote if interpreted as
-        # Windows-1252. But our code knows to skip over multibyte
-        # UTF-8 characters, so they'll survive the process unscathed.
-        for tricky_unicode_char in (
-            "\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
-            "\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
-            "\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
-            ):
-            input = tricky_unicode_char.encode("utf8")
-            self.assertTrue(input.endswith(b'\x93'))
-            output = UnicodeDammit.detwingle(input)
-            self.assertEqual(output, input)
-
-    def test_find_declared_encoding(self):
-        # Test our ability to find a declared encoding inside an
-        # XML or HTML document.
-        #
-        # Even if the document comes in as Unicode, it may be
-        # interesting to know what encoding was claimed
-        # originally.
-
-        html_unicode = '<html><head><meta charset="utf-8"></head></html>'
-        html_bytes = html_unicode.encode("ascii")
-
-        xml_unicode= '<?xml version="1.0" encoding="ISO-8859-1" ?>'
-        xml_bytes = xml_unicode.encode("ascii")
-
-        m = EncodingDetector.find_declared_encoding
-        self.assertEqual(None, m(html_unicode, is_html=False))
-        self.assertEqual("utf-8", m(html_unicode, is_html=True))
-        self.assertEqual("utf-8", m(html_bytes, is_html=True))
-
-        self.assertEqual("iso-8859-1", m(xml_unicode))
-        self.assertEqual("iso-8859-1", m(xml_bytes))
-
-        # Normally, only the first few kilobytes of a document are checked for
-        # an encoding.
-        spacer = b' ' * 5000
-        self.assertEqual(None, m(spacer + html_bytes))
-        self.assertEqual(None, m(spacer + xml_bytes))
-
-        # But you can tell find_declared_encoding to search an entire
-        # HTML document.
-        self.assertEqual(
-            "utf-8",
-            m(spacer + html_bytes, is_html=True, search_entire_document=True)
-        )
-
-        # The XML encoding declaration has to be the very first thing
-        # in the document. We'll allow whitespace before the document
-        # starts, but nothing else.
-        self.assertEqual(
-            "iso-8859-1",
-            m(xml_bytes, search_entire_document=True)
-        )
-        self.assertEqual(
-            None, m(b'a' + xml_bytes, search_entire_document=True)
-        )
-            
 class TestNamedspacedAttribute(SoupTest):
 
     def test_name_may_be_none_or_missing(self):
         a = NamespacedAttribute("xmlns", None)
         self.assertEqual(a, "xmlns")
 
+        a = NamespacedAttribute("xmlns", "")
+        self.assertEqual(a, "xmlns")
+
         a = NamespacedAttribute("xmlns")
         self.assertEqual(a, "xmlns")
         
+    def test_namespace_may_be_none_or_missing(self):
+        a = NamespacedAttribute(None, "tag")
+        self.assertEqual(a, "tag")
+        
+        a = NamespacedAttribute("", "tag")
+        self.assertEqual(a, "tag")
+        
     def test_attribute_is_equivalent_to_colon_separated_string(self):
         a = NamespacedAttribute("a", "b")
         self.assertEqual("a:b", a)
diff --git a/lib/bs4/tests/test_tree.py b/lib/bs4/tests/test_tree.py
index e69afdf9..59b51d0b 100644
--- a/lib/bs4/tests/test_tree.py
+++ b/lib/bs4/tests/test_tree.py
@@ -27,13 +27,17 @@ from bs4.element import (
     Doctype,
     Formatter,
     NavigableString,
+    Script,
     SoupStrainer,
+    Stylesheet,
     Tag,
+    TemplateString,
 )
 from bs4.testing import (
     SoupTest,
     skipIf,
 )
+from soupsieve import SelectorSyntaxError
 
 XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
 LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
@@ -1005,6 +1009,15 @@ class TestTreeModification(SoupTest):
         soup.a.extend(l)
         self.assertEqual("<a><g></g><f></f><e></e><d></d><c></c><b></b></a>", soup.decode())
 
+    def test_extend_with_another_tags_contents(self):
+        data = '<body><div id="d1"><a>1</a><a>2</a><a>3</a><a>4</a></div><div id="d2"></div></body>'
+        soup = self.soup(data)
+        d1 = soup.find('div', id='d1')
+        d2 = soup.find('div', id='d2')
+        d2.extend(d1)
+        self.assertEqual('<div id="d1"></div>', d1.decode())
+        self.assertEqual('<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>', d2.decode())
+        
     def test_move_tag_to_beginning_of_parent(self):
         data = "<a><b></b><c></c><d></d></a>"
         soup = self.soup(data)
@@ -1117,6 +1130,37 @@ class TestTreeModification(SoupTest):
         self.assertEqual(no.next_element, "no")
         self.assertEqual(no.next_sibling, " business")
 
+    def test_replace_with_errors(self):
+        # Can't replace a tag that's not part of a tree.
+        a_tag = Tag(name="a")
+        self.assertRaises(ValueError, a_tag.replace_with, "won't work")
+
+        # Can't replace a tag with its parent.
+        a_tag = self.soup("<a><b></b></a>").a
+        self.assertRaises(ValueError, a_tag.b.replace_with, a_tag)
+
+        # Or with a list that includes its parent.
+        self.assertRaises(ValueError, a_tag.b.replace_with,
+                          "string1", a_tag, "string2")
+        
+    def test_replace_with_multiple(self):
+        data = "<a><b></b><c></c></a>"
+        soup = self.soup(data)
+        d_tag = soup.new_tag("d")
+        d_tag.string = "Text In D Tag"
+        e_tag = soup.new_tag("e")
+        f_tag = soup.new_tag("f")
+        a_string = "Random Text"
+        soup.c.replace_with(d_tag, e_tag, a_string, f_tag)
+        self.assertEqual(
+            "<a><b></b><d>Text In D Tag</d><e></e>Random Text<f></f></a>",
+            soup.decode()
+        )
+        assert soup.b.next_element == d_tag
+        assert d_tag.string.next_element==e_tag
+        assert e_tag.next_element.string == a_string
+        assert e_tag.next_element.next_element == f_tag
+        
     def test_replace_first_child(self):
         data = "<a><b></b><c></c></a>"
         soup = self.soup(data)
@@ -1275,6 +1319,23 @@ class TestTreeModification(SoupTest):
         a.clear(decompose=True)
         self.assertEqual(0, len(em.contents))
 
+       
+    def test_decompose(self):
+        # Test PageElement.decompose() and PageElement.decomposed
+        soup = self.soup("<p><a>String <em>Italicized</em></a></p><p>Another para</p>")
+        p1, p2 = soup.find_all('p')
+        a = p1.a
+        text = p1.em.string
+        for i in [p1, p2, a, text]:
+            self.assertEqual(False, i.decomposed)
+
+        # This sets p1 and everything beneath it to decomposed.
+        p1.decompose()
+        for i in [p1, a, text]:
+            self.assertEqual(True, i.decomposed)
+        # p2 is unaffected.
+        self.assertEqual(False, p2.decomposed)
+            
     def test_string_set(self):
         """Tag.string = 'string'"""
         soup = self.soup("<a></a> <b><c></c></b>")
@@ -1391,7 +1452,7 @@ class TestElementObjects(SoupTest):
         self.assertEqual(soup.a.get_text(","), "a,r, , t ")
         self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
 
-    def test_get_text_ignores_comments(self):
+    def test_get_text_ignores_special_string_containers(self):
         soup = self.soup("foo<!--IGNORE-->bar")
         self.assertEqual(soup.get_text(), "foobar")
 
@@ -1400,10 +1461,51 @@ class TestElementObjects(SoupTest):
         self.assertEqual(
             soup.get_text(types=None), "fooIGNOREbar")
 
-    def test_all_strings_ignores_comments(self):
+        soup = self.soup("foo<style>CSS</style><script>Javascript</script>bar")
+        self.assertEqual(soup.get_text(), "foobar")
+        
+    def test_all_strings_ignores_special_string_containers(self):
         soup = self.soup("foo<!--IGNORE-->bar")
         self.assertEqual(['foo', 'bar'], list(soup.strings))
 
+        soup = self.soup("foo<style>CSS</style><script>Javascript</script>bar")
+        self.assertEqual(['foo', 'bar'], list(soup.strings))
+
+    def test_string_methods_inside_special_string_container_tags(self):
+        # Strings inside tags like <script> are generally ignored by
+        # methods like get_text, because they're not what humans
+        # consider 'text'. But if you call get_text on the <script>
+        # tag itself, those strings _are_ considered to be 'text',
+        # because there's nothing else you might be looking for.
+        
+        style = self.soup("<div>a<style>Some CSS</style></div>")
+        template = self.soup("<div>a<template><p>Templated <b>text</b>.</p><!--With a comment.--></template></div>")
+        script = self.soup("<div>a<script><!--a comment-->Some text</script></div>")
+        
+        self.assertEqual(style.div.get_text(), "a")
+        self.assertEqual(list(style.div.strings), ["a"])
+        self.assertEqual(style.div.style.get_text(), "Some CSS")
+        self.assertEqual(list(style.div.style.strings),
+                         ['Some CSS'])
+        
+        # The comment is not picked up here. That's because it was
+        # parsed into a Comment object, which is not considered
+        # interesting by template.strings.
+        self.assertEqual(template.div.get_text(), "a")
+        self.assertEqual(list(template.div.strings), ["a"])
+        self.assertEqual(template.div.template.get_text(), "Templated text.")
+        self.assertEqual(list(template.div.template.strings),
+                         ["Templated ", "text", "."])
+
+        # The comment is included here, because it didn't get parsed
+        # into a Comment object--it's part of the Script string.
+        self.assertEqual(script.div.get_text(), "a")
+        self.assertEqual(list(script.div.strings), ["a"])
+        self.assertEqual(script.div.script.get_text(),
+                         "<!--a comment-->Some text")
+        self.assertEqual(list(script.div.script.strings),
+                         ['<!--a comment-->Some text'])
+
 class TestCDAtaListAttributes(SoupTest):
 
     """Testing cdata-list attributes like 'class'.
@@ -1775,71 +1877,7 @@ class TestEncoding(SoupTest):
         else:
             self.assertEqual(b'<b>\\u2603</b>', repr(soup))
 
-class TestFormatter(SoupTest):
-
-    def test_sort_attributes(self):
-        # Test the ability to override Formatter.attributes() to,
-        # e.g., disable the normal sorting of attributes.
-        class UnsortedFormatter(Formatter):
-            def attributes(self, tag):
-                self.called_with = tag
-                for k, v in sorted(tag.attrs.items()):
-                    if k == 'ignore':
-                        continue
-                    yield k,v
-
-        soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>')
-        formatter = UnsortedFormatter()
-        decoded = soup.decode(formatter=formatter)
-
-        # attributes() was called on the <p> tag. It filtered out one
-        # attribute and sorted the other two.
-        self.assertEqual(formatter.called_with, soup.p)
-        self.assertEqual('<p aval="2" cval="1"></p>', decoded)
-
-
-class TestNavigableStringSubclasses(SoupTest):
-
-    def test_cdata(self):
-        # None of the current builders turn CDATA sections into CData
-        # objects, but you can create them manually.
-        soup = self.soup("")
-        cdata = CData("foo")
-        soup.insert(1, cdata)
-        self.assertEqual(str(soup), "<![CDATA[foo]]>")
-        self.assertEqual(soup.find(text="foo"), "foo")
-        self.assertEqual(soup.contents[0], "foo")
-
-    def test_cdata_is_never_formatted(self):
-        """Text inside a CData object is passed into the formatter.
-
-        But the return value is ignored.
-        """
-
-        self.count = 0
-        def increment(*args):
-            self.count += 1
-            return "BITTER FAILURE"
-
-        soup = self.soup("")
-        cdata = CData("<><><>")
-        soup.insert(1, cdata)
-        self.assertEqual(
-            b"<![CDATA[<><><>]]>", soup.encode(formatter=increment))
-        self.assertEqual(1, self.count)
-
-    def test_doctype_ends_in_newline(self):
-        # Unlike other NavigableString subclasses, a DOCTYPE always ends
-        # in a newline.
-        doctype = Doctype("foo")
-        soup = self.soup("")
-        soup.insert(1, doctype)
-        self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
-
-    def test_declaration(self):
-        d = Declaration("foo")
-        self.assertEqual("<?foo?>", d.output_ready())
-
+        
 class TestSoupSelector(TreeTest):
 
     HTML = """
@@ -1949,7 +1987,7 @@ class TestSoupSelector(TreeTest):
         self.assertEqual(len(self.soup.select('del')), 0)
 
     def test_invalid_tag(self):
-        self.assertRaises(SyntaxError, self.soup.select, 'tag%t')
+        self.assertRaises(SelectorSyntaxError, self.soup.select, 'tag%t')
 
     def test_select_dashed_tag_ids(self):
         self.assertSelects('custom-dashed-tag', ['dash1', 'dash2'])
@@ -2140,7 +2178,7 @@ class TestSoupSelector(TreeTest):
             NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
 
         self.assertRaises(
-            SyntaxError, self.soup.select, "a:nth-of-type(a)")
+            SelectorSyntaxError, self.soup.select, "a:nth-of-type(a)")
 
     def test_nth_of_type(self):
         # Try to select first paragraph
@@ -2196,7 +2234,7 @@ class TestSoupSelector(TreeTest):
         self.assertEqual([], self.soup.select('#inner ~ h2'))
 
     def test_dangling_combinator(self):
-        self.assertRaises(SyntaxError, self.soup.select, 'h1 >')
+        self.assertRaises(SelectorSyntaxError, self.soup.select, 'h1 >')
 
     def test_sibling_combinator_wont_select_same_tag_twice(self):
         self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
@@ -2227,8 +2265,8 @@ class TestSoupSelector(TreeTest):
         self.assertSelects('div x,y,  z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
 
     def test_invalid_multiple_select(self):
-        self.assertRaises(SyntaxError, self.soup.select, ',x, y')
-        self.assertRaises(SyntaxError, self.soup.select, 'x,,y')
+        self.assertRaises(SelectorSyntaxError, self.soup.select, ',x, y')
+        self.assertRaises(SelectorSyntaxError, self.soup.select, 'x,,y')
 
     def test_multiple_select_attrs(self):
         self.assertSelects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])