diff --git a/lib/soupsieve/__init__.py b/lib/soupsieve/__init__.py index ebd3a4a4..fefc6ca0 100644 --- a/lib/soupsieve/__init__.py +++ b/lib/soupsieve/__init__.py @@ -25,17 +25,16 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -from __future__ import unicode_literals from .__meta__ import __version__, __version_info__ # noqa: F401 from . import css_parser as cp from . import css_match as cm from . import css_types as ct -from .util import DEBUG, deprecated, SelectorSyntaxError # noqa: F401 +from .util import DEBUG, SelectorSyntaxError # noqa: F401 __all__ = ( 'DEBUG', 'SelectorSyntaxError', 'SoupSieve', - 'closest', 'comments', 'compile', 'filter', 'icomments', - 'iselect', 'match', 'select', 'select_one' + 'closest', 'compile', 'filter', 'iselect', + 'match', 'select', 'select_one' ) SoupSieve = cm.SoupSieve @@ -45,11 +44,11 @@ def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001 """Compile CSS pattern.""" if namespaces is not None: - namespaces = ct.Namespaces(**namespaces) + namespaces = ct.Namespaces(namespaces) custom = kwargs.get('custom') if custom is not None: - custom = ct.CustomSelectors(**custom) + custom = ct.CustomSelectors(custom) if isinstance(pattern, SoupSieve): if flags: @@ -87,21 +86,6 @@ def filter(select, iterable, namespaces=None, flags=0, **kwargs): # noqa: A001 return compile(select, namespaces, flags, **kwargs).filter(iterable) -@deprecated("'comments' is not related to CSS selectors and will be removed in the future.") -def comments(tag, limit=0, flags=0, **kwargs): - """Get comments only.""" - - return [comment for comment in cm.CommentsMatch(tag).get_comments(limit)] - - -@deprecated("'icomments' is not related to CSS selectors and will be removed in the future.") -def icomments(tag, limit=0, flags=0, **kwargs): - """Iterate comments only.""" - - for comment in cm.CommentsMatch(tag).get_comments(limit): - yield comment - - def select_one(select, tag, namespaces=None, flags=0, **kwargs): """Select a single tag.""" diff --git a/lib/soupsieve/__meta__.py b/lib/soupsieve/__meta__.py index 109e4733..eb145789 100644 --- a/lib/soupsieve/__meta__.py +++ b/lib/soupsieve/__meta__.py @@ -1,5 +1,4 @@ """Meta related things.""" -from __future__ import unicode_literals from collections import namedtuple import re @@ -154,11 +153,14 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre" return ver -def parse_version(ver, pre=False): +def parse_version(ver): """Parse version into a comparable Version tuple.""" m = RE_VER.match(ver) + if m is None: + raise ValueError("'{}' is not a valid version".format(ver)) + # Handle major, minor, micro major = int(m.group('major')) minor = int(m.group('minor')) if m.group('minor') else 0 @@ -186,5 +188,5 @@ def parse_version(ver, pre=False): return Version(major, minor, micro, release, pre, post, dev) -__version_info__ = Version(1, 9, 5, "final") +__version_info__ = Version(2, 2, 1, "final") __version__ = __version_info__._get_canonical() diff --git a/lib/soupsieve/css_match.py b/lib/soupsieve/css_match.py index 9ff2e88f..a9eeaad2 100644 --- a/lib/soupsieve/css_match.py +++ b/lib/soupsieve/css_match.py @@ -1,10 +1,12 @@ """CSS matcher.""" -from __future__ import unicode_literals from datetime import datetime from . import util import re from .import css_types as ct import unicodedata +from collections.abc import Sequence + +import bs4 # Empty tag pattern (whitespace okay) RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]') @@ -88,57 +90,36 @@ class _DocumentNav(object): @staticmethod def is_doc(obj): """Is `BeautifulSoup` object.""" - - import bs4 return isinstance(obj, bs4.BeautifulSoup) @staticmethod def is_tag(obj): """Is tag.""" - - import bs4 return isinstance(obj, bs4.Tag) - @staticmethod - def is_comment(obj): - """Is comment.""" - - import bs4 - return isinstance(obj, bs4.Comment) - @staticmethod def is_declaration(obj): # pragma: no cover """Is declaration.""" - - import bs4 return isinstance(obj, bs4.Declaration) @staticmethod def is_cdata(obj): """Is CDATA.""" - - import bs4 return isinstance(obj, bs4.CData) @staticmethod def is_processing_instruction(obj): # pragma: no cover """Is processing instruction.""" - - import bs4 return isinstance(obj, bs4.ProcessingInstruction) @staticmethod def is_navigable_string(obj): """Is navigable string.""" - - import bs4 return isinstance(obj, bs4.NavigableString) @staticmethod def is_special_string(obj): """Is special string.""" - - import bs4 return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) @classmethod @@ -296,36 +277,68 @@ class _DocumentNav(object): return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None) - @staticmethod - def get_attribute_by_name(el, name, default=None): + @classmethod + def normalize_value(cls, value): + """Normalize the value to be a string or list of strings.""" + + # Treat `None` as empty string. + if value is None: + return '' + + # Pass through strings + if (isinstance(value, str)): + return value + + # If it's a byte string, convert it to Unicode, treating it as UTF-8. + if isinstance(value, bytes): + return value.decode("utf8") + + # BeautifulSoup supports sequences of attribute values, so make sure the children are strings. + if isinstance(value, Sequence): + new_value = [] + for v in value: + if isinstance(v, Sequence): + # This is most certainly a user error and will crash and burn later, + # but to avoid excessive recursion, kick out now. + new_value.append(v) + else: + # Convert the child to a string + new_value.append(cls.normalize_value(v)) + return new_value + + # Try and make anything else a string + return str(value) + + @classmethod + def get_attribute_by_name(cls, el, name, default=None): """Get attribute by name.""" value = default if el._is_xml: try: - value = el.attrs[name] + value = cls.normalize_value(el.attrs[name]) except KeyError: pass else: for k, v in el.attrs.items(): if util.lower(k) == name: - value = v + value = cls.normalize_value(v) break return value - @staticmethod - def iter_attributes(el): + @classmethod + def iter_attributes(cls, el): """Iterate attributes.""" for k, v in el.attrs.items(): - yield k, v + yield k, cls.normalize_value(v) @classmethod def get_classes(cls, el): """Get classes.""" classes = cls.get_attribute_by_name(el, 'class', []) - if isinstance(classes, util.ustr): + if isinstance(classes, str): classes = RE_NOT_WS.findall(classes) return classes @@ -336,6 +349,11 @@ class _DocumentNav(object): [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)] ) + def get_own_text(self, el, no_iframe=False): + """Get Own Text.""" + + return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)] + class Inputs(object): """Class for parsing and validating input items.""" @@ -963,12 +981,23 @@ class _Match(object): content = None for contain_list in contains: if content is None: - content = self.get_text(el, no_iframe=self.is_html) + if contain_list.own: + content = self.get_own_text(el, no_iframe=self.is_html) + else: + content = self.get_text(el, no_iframe=self.is_html) found = False for text in contain_list.text: - if text in content: - found = True - break + if contain_list.own: + for c in content: + if text in c: + found = True + break + if found: + break + else: + if text in content: + found = True + break if not found: match = False return match @@ -1429,30 +1458,6 @@ class CSSMatch(_DocumentNav, _Match): """The Beautiful Soup CSS match class.""" -class CommentsMatch(_DocumentNav): - """Comments matcher.""" - - def __init__(self, el): - """Initialize.""" - - self.assert_valid_input(el) - self.tag = el - - def get_comments(self, limit=0): - """Get comments.""" - - if limit < 1: - limit = None - - for child in self.get_descendants(self.tag, tags=False): - if self.is_comment(child): - yield child - if limit is not None: - limit -= 1 - if limit < 1: - break - - class SoupSieve(ct.Immutable): """Compiled Soup Sieve selector matching object.""" @@ -1496,19 +1501,6 @@ class SoupSieve(ct.Immutable): else: return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)] - @util.deprecated("'comments' is not related to CSS selectors and will be removed in the future.") - def comments(self, tag, limit=0): - """Get comments only.""" - - return [comment for comment in CommentsMatch(tag).get_comments(limit)] - - @util.deprecated("'icomments' is not related to CSS selectors and will be removed in the future.") - def icomments(self, tag, limit=0): - """Iterate comments only.""" - - for comment in CommentsMatch(tag).get_comments(limit): - yield comment - def select_one(self, tag): """Select a single tag.""" diff --git a/lib/soupsieve/css_parser.py b/lib/soupsieve/css_parser.py index e7c8833b..462aa947 100644 --- a/lib/soupsieve/css_parser.py +++ b/lib/soupsieve/css_parser.py @@ -1,10 +1,11 @@ """CSS selector parser.""" -from __future__ import unicode_literals import re +from functools import lru_cache from . import util from . import css_match as cm from . import css_types as ct from .util import SelectorSyntaxError +import warnings UNICODE_REPLACEMENT_CHAR = 0xFFFD @@ -59,6 +60,8 @@ PSEUDO_SIMPLE_NO_MATCH = { # Complex pseudo classes that take selector lists PSEUDO_COMPLEX = { ':contains', + ':-soup-contains', + ':-soup-contains-own', ':has', ':is', ':matches', @@ -117,9 +120,11 @@ PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) # Classes (`.class`) PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) # Prefix:Tag (`prefix|tag`) -PAT_TAG = r'(?:(?:{ident}|\*)?\|)?(?:{ident}|\*)'.format(ident=IDENTIFIER) +PAT_TAG = r'(?P(?:{ident}|\*)?\|)?(?P{ident}|\*)'.format(ident=IDENTIFIER) # Attributes (`[attr]`, `[attr=value]`, etc.) -PAT_ATTR = r'\[{ws}*(?P(?:(?:{ident}|\*)?\|)?{ident}){attr}'.format(ws=WSC, ident=IDENTIFIER, attr=ATTR) +PAT_ATTR = r''' +\[{ws}*(?P(?:{ident}|\*)?\|)?(?P{ident}){attr} +'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR) # Pseudo class (`:pseudo-class`, `:pseudo-class(`) PAT_PSEUDO_CLASS = r'(?P:{ident})(?P\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. @@ -196,7 +201,7 @@ FLG_PLACEHOLDER_SHOWN = 0x200 _MAXCACHE = 500 -@util.lru_cache(maxsize=_MAXCACHE) +@lru_cache(maxsize=_MAXCACHE) def _cached_css_compile(pattern, namespaces, custom, flags): """Cached CSS compile.""" @@ -245,7 +250,7 @@ def css_unescape(content, string=False): codepoint = int(m.group(1)[1:], 16) if codepoint == 0: codepoint = UNICODE_REPLACEMENT_CHAR - value = util.uchr(codepoint) + value = chr(codepoint) elif m.group(2): value = m.group(2)[1:] elif m.group(3): @@ -269,7 +274,7 @@ def escape(ident): string.append('\\{}'.format(ident)) else: for index, c in enumerate(ident): - codepoint = util.uord(c) + codepoint = ord(c) if codepoint == 0x00: string.append('\ufffd') elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: @@ -300,12 +305,7 @@ class SelectorPattern(object): return self.name - def enabled(self, flags): - """Enabled.""" - - return True - - def match(self, selector, index): + def match(self, selector, index, flags): """Match the selector.""" return self.re_pattern.match(selector, index) @@ -320,7 +320,7 @@ class SpecialPseudoPattern(SelectorPattern): self.patterns = {} for p in patterns: name = p[0] - pattern = SelectorPattern(name, p[2]) + pattern = p[3](name, p[2]) for pseudo in p[1]: self.patterns[pseudo] = pattern @@ -332,12 +332,7 @@ class SpecialPseudoPattern(SelectorPattern): return self.matched_name.get_name() - def enabled(self, flags): - """Enabled.""" - - return True - - def match(self, selector, index): + def match(self, selector, index, flags): """Match the selector.""" pseudo = None @@ -346,7 +341,7 @@ class SpecialPseudoPattern(SelectorPattern): name = util.lower(css_unescape(m.group('name'))) pattern = self.patterns.get(name) if pattern: - pseudo = pattern.match(selector, index) + pseudo = pattern.match(selector, index, flags) if pseudo: self.matched_name = pattern @@ -429,11 +424,16 @@ class CSSParser(object): SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE), SpecialPseudoPattern( ( - ("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS), - ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD), - ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE), - ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG), - ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR) + ( + "pseudo_contains", + (':contains', ':-soup-contains', ':-soup-contains-own'), + PAT_PSEUDO_CONTAINS, + SelectorPattern + ), + ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern), + ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern), + ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern), + ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern) ) ), SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), @@ -461,15 +461,11 @@ class CSSParser(object): inverse = False op = m.group('cmp') case = util.lower(m.group('case')) if m.group('case') else None - parts = [css_unescape(a) for a in m.group('ns_attr').split('|')] - ns = '' + ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else '' + attr = css_unescape(m.group('attr_name')) is_type = False pattern2 = None - if len(parts) > 1: - ns = parts[0] - attr = parts[1] - else: - attr = parts[0] + if case: flags = re.I if case == 'i' else 0 elif util.lower(attr) == 'type': @@ -532,13 +528,8 @@ class CSSParser(object): def parse_tag_pattern(self, sel, m, has_selector): """Parse tag pattern from regex match.""" - parts = [css_unescape(x) for x in m.group(0).split('|')] - if len(parts) > 1: - prefix = parts[0] - tag = parts[1] - else: - tag = parts[0] - prefix = None + prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None + tag = css_unescape(m.group('tag_name')) sel.tag = ct.SelectorTag(tag, prefix) has_selector = True return has_selector @@ -817,7 +808,14 @@ class CSSParser(object): def parse_pseudo_contains(self, sel, m, has_selector): """Parse contains.""" - values = m.group('values') + pseudo = util.lower(css_unescape(m.group('name'))) + if pseudo == ":contains": + warnings.warn( + "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", + FutureWarning + ) + contains_own = pseudo == ":-soup-contains-own" + values = css_unescape(m.group('values')) patterns = [] for token in RE_VALUES.finditer(values): if token.group('split'): @@ -828,7 +826,7 @@ class CSSParser(object): else: value = css_unescape(value) patterns.append(value) - sel.contains.append(ct.SelectorContains(tuple(patterns))) + sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own)) has_selector = True return has_selector @@ -918,7 +916,7 @@ class CSSParser(object): elif key == 'pseudo_class': has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) elif key == 'pseudo_element': - raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0))) + raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0))) elif key == 'pseudo_contains': has_selector = self.parse_pseudo_contains(sel, m, has_selector) elif key in ('pseudo_nth_type', 'pseudo_nth_child'): @@ -1027,9 +1025,7 @@ class CSSParser(object): while index <= end: m = None for v in self.css_tokens: - if not v.enabled(self.flags): # pragma: no cover - continue - m = v.match(pattern, index) + m = v.match(pattern, index, self.flags) if m: name = v.get_name() if self.debug: # pragma: no cover @@ -1067,7 +1063,7 @@ class CSSParser(object): # CSS pattern for `:link` and `:any-link` CSS_LINK = CSSParser( - 'html|*:is(a, area, link)[href]' + 'html|*:is(a, area)[href]' ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) # CSS pattern for `:checked` CSS_CHECKED = CSSParser( @@ -1098,23 +1094,23 @@ CSS_INDETERMINATE = CSSParser( This pattern must be at the end. Special logic is applied to the last selector. */ - html|input[type="radio"][name][name!='']:not([checked]) + html|input[type="radio"][name]:not([name='']):not([checked]) ''' ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE) # CSS pattern for `:disabled` CSS_DISABLED = CSSParser( ''' - html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], + html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], html|optgroup[disabled] > html|option, - html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset), + html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset), html|fieldset[disabled] > - html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset) + html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset) ''' ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) # CSS pattern for `:enabled` CSS_ENABLED = CSSParser( ''' - html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) + html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) ''' ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) # CSS pattern for `:required` @@ -1138,8 +1134,8 @@ CSS_PLACEHOLDER_SHOWN = CSSParser( [type=email], [type=password], [type=number] - )[placeholder][placeholder!='']:is(:not([value]), [value=""]), - html|textarea[placeholder][placeholder!=''] + )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]), + html|textarea[placeholder]:not([placeholder='']) ''' ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN) # CSS pattern default for `:nth-child` "of S" feature diff --git a/lib/soupsieve/css_types.py b/lib/soupsieve/css_types.py index e4baef37..c2b9f30d 100644 --- a/lib/soupsieve/css_types.py +++ b/lib/soupsieve/css_types.py @@ -1,6 +1,6 @@ """CSS selector structure items.""" -from __future__ import unicode_literals -from . import util +import copyreg +from collections.abc import Hashable, Mapping __all__ = ( 'Selector', @@ -86,21 +86,21 @@ class Immutable(object): __str__ = __repr__ -class ImmutableDict(util.Mapping): +class ImmutableDict(Mapping): """Hashable, immutable dictionary.""" - def __init__(self, *args, **kwargs): + def __init__(self, arg): """Initialize.""" - arg = args[0] if args else kwargs + arg is_dict = isinstance(arg, dict) if ( - is_dict and not all([isinstance(v, util.Hashable) for v in arg.values()]) or - not is_dict and not all([isinstance(k, util.Hashable) and isinstance(v, util.Hashable) for k, v in arg]) + is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or + not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]) ): raise TypeError('All values must be hashable') - self._d = dict(*args, **kwargs) + self._d = dict(arg) self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())])) def __iter__(self): @@ -133,39 +133,37 @@ class ImmutableDict(util.Mapping): class Namespaces(ImmutableDict): """Namespaces.""" - def __init__(self, *args, **kwargs): + def __init__(self, arg): """Initialize.""" # If there are arguments, check the first index. # `super` should fail if the user gave multiple arguments, # so don't bother checking that. - arg = args[0] if args else kwargs is_dict = isinstance(arg, dict) - if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]): + if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]): raise TypeError('Namespace keys and values must be Unicode strings') - elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]): + elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): raise TypeError('Namespace keys and values must be Unicode strings') - super(Namespaces, self).__init__(*args, **kwargs) + super(Namespaces, self).__init__(arg) class CustomSelectors(ImmutableDict): """Custom selectors.""" - def __init__(self, *args, **kwargs): + def __init__(self, arg): """Initialize.""" # If there are arguments, check the first index. # `super` should fail if the user gave multiple arguments, # so don't bother checking that. - arg = args[0] if args else kwargs is_dict = isinstance(arg, dict) - if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]): + if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]): raise TypeError('CustomSelectors keys and values must be Unicode strings') - elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]): + elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): raise TypeError('CustomSelectors keys and values must be Unicode strings') - super(CustomSelectors, self).__init__(*args, **kwargs) + super(CustomSelectors, self).__init__(arg) class Selector(Immutable): @@ -239,13 +237,14 @@ class SelectorAttribute(Immutable): class SelectorContains(Immutable): """Selector contains rule.""" - __slots__ = ("text", "_hash") + __slots__ = ("text", "own", "_hash") - def __init__(self, text): + def __init__(self, text, own): """Initialize.""" super(SelectorContains, self).__init__( - text=text + text=text, + own=own ) @@ -332,7 +331,7 @@ def _pickle(p): def pickle_register(obj): """Allow object to be pickled.""" - util.copyreg.pickle(obj, _pickle) + copyreg.pickle(obj, _pickle) pickle_register(Selector) diff --git a/lib/soupsieve/util.py b/lib/soupsieve/util.py index 6158367a..7f5d9f89 100644 --- a/lib/soupsieve/util.py +++ b/lib/soupsieve/util.py @@ -1,46 +1,17 @@ """Utility.""" -from __future__ import unicode_literals -from functools import wraps +from functools import wraps, lru_cache import warnings -import sys -import struct -import os import re -MODULE = os.path.dirname(__file__) - -PY3 = sys.version_info >= (3, 0) -PY35 = sys.version_info >= (3, 5) -PY37 = sys.version_info >= (3, 7) - -if PY3: - from functools import lru_cache # noqa F401 - import copyreg # noqa F401 - from collections.abc import Hashable, Mapping # noqa F401 - - ustr = str - bstr = bytes - unichar = chr - string = str -else: - from backports.functools_lru_cache import lru_cache # noqa F401 - import copy_reg as copyreg # noqa F401 - from collections import Hashable, Mapping # noqa F401 - - ustr = unicode # noqa: F821 - bstr = str - unichar = unichr # noqa: F821 - string = basestring # noqa: F821 DEBUG = 0x00001 RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$') -LC_A = ord('a') -LC_Z = ord('z') UC_A = ord('A') UC_Z = ord('Z') +@lru_cache(maxsize=512) def lower(string): """Lower.""" @@ -51,38 +22,7 @@ def lower(string): return ''.join(new_string) -def upper(string): # pragma: no cover - """Lower.""" - - new_string = [] - for c in string: - o = ord(c) - new_string.append(chr(o - 32) if LC_A <= o <= LC_Z else c) - return ''.join(new_string) - - -def uchr(i): - """Allow getting Unicode character on narrow python builds.""" - - try: - return unichar(i) - except ValueError: # pragma: no cover - return struct.pack('i', i).decode('utf-32') - - -def uord(c): - """Get Unicode ordinal.""" - - if len(c) == 2: # pragma: no cover - high, low = [ord(p) for p in c] - ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000 - else: - ordinal = ord(c) - - return ordinal - - -class SelectorSyntaxError(SyntaxError): +class SelectorSyntaxError(Exception): """Syntax error in a CSS selector.""" def __init__(self, msg, pattern=None, index=None):