An H1
-Some text
-Some more text
-An H2
-Another
-Bob -Another H2
-me - -span1a1 -span1a2 test - -span2a1 - - -English
-English UK
-English US
-French
-diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py index db71cc7c..3d2ab09a 100644 --- a/lib/bs4/__init__.py +++ b/lib/bs4/__init__.py @@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.11.2" +__version__ = "4.12.2" __copyright__ = "Copyright (c) 2004-2023 Leonard Richardson" # Use of this source code is governed by the MIT license. __license__ = "MIT" @@ -38,11 +38,13 @@ from .builder import ( builder_registry, ParserRejectedMarkup, XMLParsedAsHTMLWarning, + HTMLParserTreeBuilder ) from .dammit import UnicodeDammit from .element import ( CData, Comment, + CSS, DEFAULT_OUTPUT_ENCODING, Declaration, Doctype, @@ -116,7 +118,7 @@ class BeautifulSoup(Tag): ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n" - + def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, element_classes=None, **kwargs): @@ -348,25 +350,49 @@ class BeautifulSoup(Tag): self.markup = None self.builder.soup = None - def __copy__(self): - """Copy a BeautifulSoup object by converting the document to a string and parsing it again.""" - copy = type(self)( - self.encode('utf-8'), builder=self.builder, from_encoding='utf-8' - ) + def _clone(self): + """Create a new BeautifulSoup object with the same TreeBuilder, + but not associated with any markup. - # Although we encoded the tree to UTF-8, that may not have - # been the encoding of the original markup. Set the copy's - # .original_encoding to reflect the original object's - # .original_encoding. - copy.original_encoding = self.original_encoding - return copy + This is the first step of the deepcopy process. + """ + clone = type(self)("", None, self.builder) + # Keep track of the encoding of the original document, + # since we won't be parsing it again. + clone.original_encoding = self.original_encoding + return clone + def __getstate__(self): # Frequently a tree builder can't be pickled. d = dict(self.__dict__) if 'builder' in d and d['builder'] is not None and not self.builder.picklable: - d['builder'] = None + d['builder'] = type(self.builder) + # Store the contents as a Unicode string. + d['contents'] = [] + d['markup'] = self.decode() + + # If _most_recent_element is present, it's a Tag object left + # over from initial parse. It might not be picklable and we + # don't need it. + if '_most_recent_element' in d: + del d['_most_recent_element'] return d + + def __setstate__(self, state): + # If necessary, restore the TreeBuilder by looking it up. + self.__dict__ = state + if isinstance(self.builder, type): + self.builder = self.builder() + elif not self.builder: + # We don't know which builder was used to build this + # parse tree, so use a default we know is always available. + self.builder = HTMLParserTreeBuilder() + self.builder.soup = self + self.reset() + self._feed() + return state + @classmethod def _decode_markup(cls, markup): @@ -468,6 +494,7 @@ class BeautifulSoup(Tag): self.open_tag_counter = Counter() self.preserve_whitespace_tag_stack = [] self.string_container_stack = [] + self._most_recent_element = None self.pushTag(self) def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, @@ -749,7 +776,7 @@ class BeautifulSoup(Tag): def decode(self, pretty_print=False, eventual_encoding=DEFAULT_OUTPUT_ENCODING, - formatter="minimal"): + formatter="minimal", iterator=None): """Returns a string or Unicode representation of the parse tree as an HTML or XML document. @@ -776,7 +803,7 @@ class BeautifulSoup(Tag): else: indent_level = 0 return prefix + super(BeautifulSoup, self).decode( - indent_level, eventual_encoding, formatter) + indent_level, eventual_encoding, formatter, iterator) # Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup' _s = BeautifulSoup diff --git a/lib/bs4/builder/_htmlparser.py b/lib/bs4/builder/_htmlparser.py index e48b6a0e..e065096b 100644 --- a/lib/bs4/builder/_htmlparser.py +++ b/lib/bs4/builder/_htmlparser.py @@ -24,6 +24,7 @@ from bs4.dammit import EntitySubstitution, UnicodeDammit from bs4.builder import ( DetectsXMLParsedAsHTML, + ParserRejectedMarkup, HTML, HTMLTreeBuilder, STRICT, @@ -70,6 +71,22 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML): self._initialize_xml_detector() + def error(self, message): + # NOTE: This method is required so long as Python 3.9 is + # supported. The corresponding code is removed from HTMLParser + # in 3.5, but not removed from ParserBase until 3.10. + # https://github.com/python/cpython/issues/76025 + # + # The original implementation turned the error into a warning, + # but in every case I discovered, this made HTMLParser + # immediately crash with an error message that was less + # helpful than the warning. The new implementation makes it + # more clear that html.parser just can't parse this + # markup. The 3.10 implementation does the same, though it + # raises AssertionError rather than calling a method. (We + # catch this error and wrap it in a ParserRejectedMarkup.) + raise ParserRejectedMarkup(message) + def handle_startendtag(self, name, attrs): """Handle an incoming empty-element tag. @@ -359,6 +376,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup - parser.feed(markup) + try: + parser.feed(markup) + except AssertionError as e: + # html.parser raises AssertionError in rare cases to + # indicate a fatal problem with the markup, especially + # when there's an error in the doctype declaration. + raise ParserRejectedMarkup(e) parser.close() parser.already_closed_empty_element = [] diff --git a/lib/bs4/css.py b/lib/bs4/css.py new file mode 100644 index 00000000..245ac601 --- /dev/null +++ b/lib/bs4/css.py @@ -0,0 +1,280 @@ +"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve).""" + +import warnings +try: + import soupsieve +except ImportError as e: + soupsieve = None + warnings.warn( + 'The soupsieve package is not installed. CSS selectors cannot be used.' + ) + + +class CSS(object): + """A proxy object against the soupsieve library, to simplify its + CSS selector API. + + Acquire this object through the .css attribute on the + BeautifulSoup object, or on the Tag you want to use as the + starting point for a CSS selector. + + The main advantage of doing this is that the tag to be selected + against doesn't need to be explicitly specified in the function + calls, since it's already scoped to a tag. + """ + + def __init__(self, tag, api=soupsieve): + """Constructor. + + You don't need to instantiate this class yourself; instead, + access the .css attribute on the BeautifulSoup object, or on + the Tag you want to use as the starting point for your CSS + selector. + + :param tag: All CSS selectors will use this as their starting + point. + + :param api: A plug-in replacement for the soupsieve module, + designed mainly for use in tests. + """ + if api is None: + raise NotImplementedError( + "Cannot execute CSS selectors because the soupsieve package is not installed." + ) + self.api = api + self.tag = tag + + def escape(self, ident): + """Escape a CSS identifier. + + This is a simple wrapper around soupselect.escape(). See the + documentation for that function for more information. + """ + if soupsieve is None: + raise NotImplementedError( + "Cannot escape CSS identifiers because the soupsieve package is not installed." + ) + return self.api.escape(ident) + + def _ns(self, ns, select): + """Normalize a dictionary of namespaces.""" + if not isinstance(select, self.api.SoupSieve) and ns is None: + # If the selector is a precompiled pattern, it already has + # a namespace context compiled in, which cannot be + # replaced. + ns = self.tag._namespaces + return ns + + def _rs(self, results): + """Normalize a list of results to a Resultset. + + A ResultSet is more consistent with the rest of Beautiful + Soup's API, and ResultSet.__getattr__ has a helpful error + message if you try to treat a list of results as a single + result (a common mistake). + """ + # Import here to avoid circular import + from bs4.element import ResultSet + return ResultSet(None, results) + + def compile(self, select, namespaces=None, flags=0, **kwargs): + """Pre-compile a selector and return the compiled object. + + :param selector: A CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will use the prefixes it encountered while + parsing the document. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.compile() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.compile() method. + + :return: A precompiled selector object. + :rtype: soupsieve.SoupSieve + """ + return self.api.compile( + select, self._ns(namespaces, select), flags, **kwargs + ) + + def select_one(self, select, namespaces=None, flags=0, **kwargs): + """Perform a CSS selection operation on the current Tag and return the + first result. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.select_one() + method. + + :param selector: A CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will use the prefixes it encountered while + parsing the document. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.select_one() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.select_one() method. + + :return: A Tag, or None if the selector has no match. + :rtype: bs4.element.Tag + + """ + return self.api.select_one( + select, self.tag, self._ns(namespaces, select), flags, **kwargs + ) + + def select(self, select, namespaces=None, limit=0, flags=0, **kwargs): + """Perform a CSS selection operation on the current Tag. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.select() + method. + + :param selector: A string containing a CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param limit: After finding this number of results, stop looking. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.select() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.select() method. + + :return: A ResultSet of Tag objects. + :rtype: bs4.element.ResultSet + + """ + if limit is None: + limit = 0 + + return self._rs( + self.api.select( + select, self.tag, self._ns(namespaces, select), limit, flags, + **kwargs + ) + ) + + def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs): + """Perform a CSS selection operation on the current Tag. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.iselect() + method. It is the same as select(), but it returns a generator + instead of a list. + + :param selector: A string containing a CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param limit: After finding this number of results, stop looking. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.iselect() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.iselect() method. + + :return: A generator + :rtype: types.GeneratorType + """ + return self.api.iselect( + select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs + ) + + def closest(self, select, namespaces=None, flags=0, **kwargs): + """Find the Tag closest to this one that matches the given selector. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.closest() + method. + + :param selector: A string containing a CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.closest() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.closest() method. + + :return: A Tag, or None if there is no match. + :rtype: bs4.Tag + + """ + return self.api.closest( + select, self.tag, self._ns(namespaces, select), flags, **kwargs + ) + + def match(self, select, namespaces=None, flags=0, **kwargs): + """Check whether this Tag matches the given CSS selector. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.match() + method. + + :param: a CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.match() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.match() method. + + :return: True if this Tag matches the selector; False otherwise. + :rtype: bool + """ + return self.api.match( + select, self.tag, self._ns(namespaces, select), flags, **kwargs + ) + + def filter(self, select, namespaces=None, flags=0, **kwargs): + """Filter this Tag's direct children based on the given CSS selector. + + This uses the Soup Sieve library. It works the same way as + passing this Tag into that library's soupsieve.filter() + method. More information, for more information see the + documentation for soupsieve.filter(). + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.filter() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.filter() method. + + :return: A ResultSet of Tag objects. + :rtype: bs4.element.ResultSet + + """ + return self._rs( + self.api.filter( + select, self.tag, self._ns(namespaces, select), flags, **kwargs + ) + ) diff --git a/lib/bs4/diagnose.py b/lib/bs4/diagnose.py index 3bf583f5..e079772e 100644 --- a/lib/bs4/diagnose.py +++ b/lib/bs4/diagnose.py @@ -59,21 +59,6 @@ def diagnose(data): if hasattr(data, 'read'): data = data.read() - elif data.startswith("http:") or data.startswith("https:"): - print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)) - print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") - return - else: - try: - if os.path.exists(data): - print(('"%s" looks like a filename. Reading data from the file.' % data)) - with open(data) as fp: - data = fp.read() - except ValueError: - # This can happen on some platforms when the 'filename' is - # too long. Assume it's data and not a filename. - pass - print("") for parser in basic_parsers: print(("Trying to parse your markup with %s" % parser)) diff --git a/lib/bs4/element.py b/lib/bs4/element.py index 583d0e8a..9c73957c 100644 --- a/lib/bs4/element.py +++ b/lib/bs4/element.py @@ -8,14 +8,8 @@ except ImportError as e: import re import sys import warnings -try: - import soupsieve -except ImportError as e: - soupsieve = None - warnings.warn( - 'The soupsieve package is not installed. CSS selectors cannot be used.' - ) +from bs4.css import CSS from bs4.formatter import ( Formatter, HTMLFormatter, @@ -69,13 +63,13 @@ PYTHON_SPECIFIC_ENCODINGS = set([ "string-escape", "string_escape", ]) - + class NamespacedAttribute(str): """A namespaced string (e.g. 'xml:lang') that remembers the namespace ('xml') and the name ('lang') that were used to create it. """ - + def __new__(cls, prefix, name=None, namespace=None): if not name: # This is the default namespace. Its name "has no value" @@ -146,14 +140,19 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): return match.group(1) + encoding return self.CHARSET_RE.sub(rewrite, self.original_value) - + class PageElement(object): """Contains the navigational information for some part of the page: that is, its current location in the parse tree. NavigableString, Tag, etc. are all subclasses of PageElement. """ - + + # In general, we can't tell just by looking at an element whether + # it's contained in an XML document or an HTML document. But for + # Tags (q.v.) we can store this information at parse time. + known_xml = None + def setup(self, parent=None, previous_element=None, next_element=None, previous_sibling=None, next_sibling=None): """Sets up the initial relations between this element and @@ -163,7 +162,7 @@ class PageElement(object): :param previous_element: The element parsed immediately before this one. - + :param next_element: The element parsed immediately before this one. @@ -257,11 +256,11 @@ class PageElement(object): default = object() def _all_strings(self, strip=False, types=default): """Yield all strings of certain classes, possibly stripping them. - + This is implemented differently in Tag and NavigableString. """ raise NotImplementedError() - + @property def stripped_strings(self): """Yield all strings in this PageElement, stripping them first. @@ -294,11 +293,11 @@ class PageElement(object): strip, types=types)]) getText = get_text text = property(get_text) - + def replace_with(self, *args): - """Replace this PageElement with one or more PageElements, keeping the + """Replace this PageElement with one or more PageElements, keeping the rest of the tree the same. - + :param args: One or more PageElements. :return: `self`, no longer part of the tree. """ @@ -410,7 +409,7 @@ class PageElement(object): This works the same way as `list.insert`. :param position: The numeric position that should be occupied - in `self.children` by the new PageElement. + in `self.children` by the new PageElement. :param new_child: A PageElement. """ if new_child is None: @@ -546,7 +545,7 @@ class PageElement(object): "Element has no parent, so 'after' has no meaning.") if any(x is self for x in args): raise ValueError("Can't insert an element after itself.") - + offset = 0 for successor in args: # Extract first so that the index won't be screwed up if they @@ -912,7 +911,7 @@ class PageElement(object): :rtype: bool """ return getattr(self, '_decomposed', False) or False - + # Old non-property versions of the generators, for backwards # compatibility with BS3. def nextGenerator(self): @@ -936,16 +935,11 @@ class NavigableString(str, PageElement): When Beautiful Soup parses the markup penguin, it will create a NavigableString for the string "penguin". - """ + """ PREFIX = '' SUFFIX = '' - # We can't tell just by looking at a string whether it's contained - # in an XML document or an HTML document. - - known_xml = None - def __new__(cls, value): """Create a new NavigableString. @@ -961,12 +955,22 @@ class NavigableString(str, PageElement): u.setup() return u - def __copy__(self): + def __deepcopy__(self, memo, recursive=False): """A copy of a NavigableString has the same contents and class as the original, but it is not connected to the parse tree. + + :param recursive: This parameter is ignored; it's only defined + so that NavigableString.__deepcopy__ implements the same + signature as Tag.__deepcopy__. """ return type(self)(self) + def __copy__(self): + """A copy of a NavigableString can only be a deep copy, because + only one PageElement can occupy a given place in a parse tree. + """ + return self.__deepcopy__({}) + def __getnewargs__(self): return (str(self),) @@ -1059,10 +1063,10 @@ class PreformattedString(NavigableString): as comments (the Comment class) and CDATA blocks (the CData class). """ - + PREFIX = '' SUFFIX = '' - + def output_ready(self, formatter=None): """Make this string ready for output by adding any subclass-specific prefix or suffix. @@ -1144,7 +1148,7 @@ class Stylesheet(NavigableString): """ pass - + class Script(NavigableString): """A NavigableString representing an executable script (probably Javascript). @@ -1250,7 +1254,7 @@ class Tag(PageElement): if ((not builder or builder.store_line_numbers) and (sourceline is not None or sourcepos is not None)): self.sourceline = sourceline - self.sourcepos = sourcepos + self.sourcepos = sourcepos if attrs is None: attrs = {} elif attrs: @@ -1308,13 +1312,49 @@ class Tag(PageElement): self.interesting_string_types = builder.string_containers[self.name] else: self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES - + parserClass = _alias("parser_class") # BS3 - def __copy__(self): - """A copy of a Tag is a new Tag, unconnected to the parse tree. + def __deepcopy__(self, memo, recursive=True): + """A deepcopy of a Tag is a new Tag, unconnected to the parse tree. Its contents are a copy of the old Tag's contents. """ + clone = self._clone() + + if recursive: + # Clone this tag's descendants recursively, but without + # making any recursive function calls. + tag_stack = [clone] + for event, element in self._event_stream(self.descendants): + if event is Tag.END_ELEMENT_EVENT: + # Stop appending incoming Tags to the Tag that was + # just closed. + tag_stack.pop() + else: + descendant_clone = element.__deepcopy__( + memo, recursive=False + ) + # Add to its parent's .contents + tag_stack[-1].append(descendant_clone) + + if event is Tag.START_ELEMENT_EVENT: + # Add the Tag itself to the stack so that its + # children will be .appended to it. + tag_stack.append(descendant_clone) + return clone + + def __copy__(self): + """A copy of a Tag must always be a deep copy, because a Tag's + children can only have one parent at a time. + """ + return self.__deepcopy__({}) + + def _clone(self): + """Create a new Tag just like this one, but with no + contents and unattached to any parse tree. + + This is the first step in the deepcopy process. + """ clone = type(self)( None, self.builder, self.name, self.namespace, self.prefix, self.attrs, is_xml=self._is_xml, @@ -1326,8 +1366,6 @@ class Tag(PageElement): ) for attr in ('can_be_empty_element', 'hidden'): setattr(clone, attr, getattr(self, attr)) - for child in self.contents: - clone.append(child.__copy__()) return clone @property @@ -1433,7 +1471,7 @@ class Tag(PageElement): i.contents = [] i._decomposed = True i = n - + def clear(self, decompose=False): """Wipe out all children of this PageElement by calling extract() on them. @@ -1521,7 +1559,7 @@ class Tag(PageElement): if not isinstance(value, list): value = [value] return value - + def has_attr(self, key): """Does this PageElement have an attribute with the given name?""" return key in self.attrs @@ -1608,7 +1646,7 @@ class Tag(PageElement): def __repr__(self, encoding="unicode-escape"): """Renders this PageElement as a string. - :param encoding: The encoding to use (Python 2 only). + :param encoding: The encoding to use (Python 2 only). TODO: This is now ignored and a warning should be issued if a value is provided. :return: A (Unicode) string. @@ -1650,106 +1688,212 @@ class Tag(PageElement): def decode(self, indent_level=None, eventual_encoding=DEFAULT_OUTPUT_ENCODING, - formatter="minimal"): - """Render a Unicode representation of this PageElement and its - contents. - - :param indent_level: Each line of the rendering will be - indented this many spaces. Used internally in - recursive calls while pretty-printing. - :param eventual_encoding: The tag is destined to be - encoded into this encoding. This method is _not_ - responsible for performing that encoding. This information - is passed in so that it can be substituted in if the - document contains a tag that mentions the document's - encoding. - :param formatter: A Formatter object, or a string naming one of - the standard formatters. - """ - + formatter="minimal", + iterator=None): + pieces = [] # First off, turn a non-Formatter `formatter` into a Formatter # object. This will stop the lookup from happening over and # over again. if not isinstance(formatter, Formatter): formatter = self.formatter_for_name(formatter) - attributes = formatter.attributes(self) - attrs = [] - for key, val in attributes: - if val is None: - decoded = key + + if indent_level is True: + indent_level = 0 + + # The currently active tag that put us into string literal + # mode. Until this element is closed, children will be treated + # as string literals and not pretty-printed. String literal + # mode is turned on immediately after this tag begins, and + # turned off immediately before it's closed. This means there + # will be whitespace before and after the tag itself. + string_literal_tag = None + + for event, element in self._event_stream(iterator): + if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT): + piece = element._format_tag( + eventual_encoding, formatter, opening=True + ) + elif event is Tag.END_ELEMENT_EVENT: + piece = element._format_tag( + eventual_encoding, formatter, opening=False + ) + if indent_level is not None: + indent_level -= 1 else: - if isinstance(val, list) or isinstance(val, tuple): - val = ' '.join(val) - elif not isinstance(val, str): - val = str(val) - elif ( - isinstance(val, AttributeValueWithCharsetSubstitution) - and eventual_encoding is not None - ): - val = val.encode(eventual_encoding) + piece = element.output_ready(formatter) - text = formatter.attribute_value(val) - decoded = ( - str(key) + '=' - + formatter.quoted_attribute_value(text)) - attrs.append(decoded) - close = '' - closeTag = '' + # Now we need to apply the 'prettiness' -- extra + # whitespace before and/or after this tag. This can get + # complicated because certain tags, like
and + # for you +""" + + expect = """
some
+ for you
+
+Some text
-Some more text
-Another
-Bob -English
-English UK
-English US
-French
-