diff --git a/lib/jaraco/text/__init__.py b/lib/jaraco/text/__init__.py index 95e20b40..5f75519a 100644 --- a/lib/jaraco/text/__init__.py +++ b/lib/jaraco/text/__init__.py @@ -4,11 +4,12 @@ import textwrap import functools try: - from importlib import resources # type: ignore + from importlib.resources import files # type: ignore except ImportError: # pragma: nocover - import importlib_resources as resources # type: ignore + from importlib_resources import files # type: ignore from jaraco.functools import compose, method_cache +from jaraco.context import ExceptionTrap def substitution(old, new): @@ -109,7 +110,7 @@ class FoldedCase(str): return hash(self.lower()) def __contains__(self, other): - return super(FoldedCase, self).lower().__contains__(other.lower()) + return super().lower().__contains__(other.lower()) def in_(self, other): "Does self appear in other?" @@ -118,7 +119,7 @@ class FoldedCase(str): # cache lower since it's likely to be called frequently. @method_cache def lower(self): - return super(FoldedCase, self).lower() + return super().lower() def index(self, sub): return self.lower().index(sub.lower()) @@ -128,6 +129,11 @@ class FoldedCase(str): return pattern.split(self, maxsplit) +# Python 3.8 compatibility +_unicode_trap = ExceptionTrap(UnicodeDecodeError) + + +@_unicode_trap.passes def is_decodable(value): r""" Return True if the supplied value is decodable (using the default @@ -138,14 +144,7 @@ def is_decodable(value): >>> is_decodable(b'\x32') True """ - # TODO: This code could be expressed more consisely and directly - # with a jaraco.context.ExceptionTrap, but that adds an unfortunate - # long dependency tree, so for now, use boolean literals. - try: - value.decode() - except UnicodeDecodeError: - return False - return True + value.decode() def is_binary(value): @@ -225,7 +224,7 @@ def unwrap(s): return '\n'.join(cleaned) -lorem_ipsum = resources.read_text(__name__, 'Lorem ipsum.txt') # type: ignore +lorem_ipsum: str = files(__name__).joinpath('Lorem ipsum.txt').read_text() class Splitter(object): @@ -370,10 +369,6 @@ class WordSet(tuple): result = WordSet(result) return result - # for compatibility with Python 2 - def __getslice__(self, i, j): # pragma: nocover - return self.__getitem__(slice(i, j)) - @classmethod def parse(cls, identifier): matches = cls._pattern.finditer(identifier) @@ -527,3 +522,79 @@ def normalize_newlines(text): newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029'] pattern = '|'.join(newlines) return re.sub(pattern, '\n', text) + + +def _nonblank(str): + return str and not str.startswith('#') + + +@functools.singledispatch +def yield_lines(iterable): + r""" + Yield valid lines of a string or iterable. + + >>> list(yield_lines('')) + [] + >>> list(yield_lines(['foo', 'bar'])) + ['foo', 'bar'] + >>> list(yield_lines('foo\nbar')) + ['foo', 'bar'] + >>> list(yield_lines('\nfoo\n#bar\nbaz #comment')) + ['foo', 'baz #comment'] + >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n'])) + ['foo', 'bar', 'baz', 'bing'] + """ + return itertools.chain.from_iterable(map(yield_lines, iterable)) + + +@yield_lines.register(str) +def _(text): + return filter(_nonblank, map(str.strip, text.splitlines())) + + +def drop_comment(line): + """ + Drop comments. + + >>> drop_comment('foo # bar') + 'foo' + + A hash without a space may be in a URL. + + >>> drop_comment('http://example.com/foo#bar') + 'http://example.com/foo#bar' + """ + return line.partition(' #')[0] + + +def join_continuation(lines): + r""" + Join lines continued by a trailing backslash. + + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar \\', 'baz'])) + ['foobarbaz'] + + Not sure why, but... + The character preceeding the backslash is also elided. + + >>> list(join_continuation(['goo\\', 'dly'])) + ['godly'] + + A terrible idea, but... + If no line is available to continue, suppress the lines. + + >>> list(join_continuation(['foo', 'bar\\', 'baz\\'])) + ['foo'] + """ + lines = iter(lines) + for item in lines: + while item.endswith('\\'): + try: + item = item[:-2].strip() + next(lines) + except StopIteration: + return + yield item