Update soupsieve-2.2.1

2025-08-20 21:33:18 -07:00 · 2021-10-14 20:45:43 -07:00 · 2021-10-14 20:45:43 -07:00 · b581460b51
commit b581460b51
parent 9a54fb9a44
6 changed files with 146 additions and 233 deletions
--- a/lib/soupsieve/init.py
+++ b/lib/soupsieve/init.py
@ -25,17 +25,16 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 from __future__ import unicode_literals
 from .__meta__ import __version__, __version_info__  # noqa: F401
 from . import css_parser as cp
 from . import css_match as cm
 from . import css_types as ct
-from .util import DEBUG, deprecated, SelectorSyntaxError  # noqa: F401
+from .util import DEBUG, SelectorSyntaxError  # noqa: F401
 __all__ = (
    'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
-    'closest', 'comments', 'compile', 'filter', 'icomments',
+    'closest', 'compile', 'filter', 'iselect',
-    'iselect', 'match', 'select', 'select_one'
+    'match', 'select', 'select_one'
 )
 SoupSieve = cm.SoupSieve
@ -45,11 +44,11 @@ def compile(pattern, namespaces=None, flags=0, **kwargs):  # noqa: A001
    """Compile CSS pattern."""
    if namespaces is not None:
-        namespaces = ct.Namespaces(**namespaces)
+        namespaces = ct.Namespaces(namespaces)
    custom = kwargs.get('custom')
    if custom is not None:
-        custom = ct.CustomSelectors(**custom)
+        custom = ct.CustomSelectors(custom)
    if isinstance(pattern, SoupSieve):
        if flags:
@ -87,21 +86,6 @@ def filter(select, iterable, namespaces=None, flags=0, **kwargs):  # noqa: A001
    return compile(select, namespaces, flags, **kwargs).filter(iterable)
@deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
 def comments(tag, limit=0, flags=0, **kwargs):
    """Get comments only."""
    return [comment for comment in cm.CommentsMatch(tag).get_comments(limit)]
@deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
 def icomments(tag, limit=0, flags=0, **kwargs):
    """Iterate comments only."""
    for comment in cm.CommentsMatch(tag).get_comments(limit):
        yield comment
 def select_one(select, tag, namespaces=None, flags=0, **kwargs):
    """Select a single tag."""
--- a/lib/soupsieve/meta.py
+++ b/lib/soupsieve/meta.py
@ -1,5 +1,4 @@
 """Meta related things."""
 from __future__ import unicode_literals
 from collections import namedtuple
 import re
@ -154,11 +153,14 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
        return ver
-def parse_version(ver, pre=False):
+def parse_version(ver):
    """Parse version into a comparable Version tuple."""
    m = RE_VER.match(ver)
    if m is None:
        raise ValueError("'{}' is not a valid version".format(ver))
    # Handle major, minor, micro
    major = int(m.group('major'))
    minor = int(m.group('minor')) if m.group('minor') else 0
@ -186,5 +188,5 @@ def parse_version(ver, pre=False):
    return Version(major, minor, micro, release, pre, post, dev)
-__version_info__ = Version(1, 9, 5, "final")
+__version_info__ = Version(2, 2, 1, "final")
 __version__ = __version_info__._get_canonical()
--- a/lib/soupsieve/css_match.py
+++ b/lib/soupsieve/css_match.py
@ -1,10 +1,12 @@
 """CSS matcher."""
 from __future__ import unicode_literals
 from datetime import datetime
 from . import util
 import re
 from .import css_types as ct
 import unicodedata
 from collections.abc import Sequence
 import bs4
 # Empty tag pattern (whitespace okay)
 RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -88,57 +90,36 @@ class _DocumentNav(object):
    @staticmethod
    def is_doc(obj):
        """Is `BeautifulSoup` object."""
        import bs4
        return isinstance(obj, bs4.BeautifulSoup)
    @staticmethod
    def is_tag(obj):
        """Is tag."""
        import bs4
        return isinstance(obj, bs4.Tag)
    @staticmethod
    def is_comment(obj):
        """Is comment."""
        import bs4
        return isinstance(obj, bs4.Comment)
    @staticmethod
    def is_declaration(obj):  # pragma: no cover
        """Is declaration."""
        import bs4
        return isinstance(obj, bs4.Declaration)
    @staticmethod
    def is_cdata(obj):
        """Is CDATA."""
        import bs4
        return isinstance(obj, bs4.CData)
    @staticmethod
    def is_processing_instruction(obj):  # pragma: no cover
        """Is processing instruction."""
        import bs4
        return isinstance(obj, bs4.ProcessingInstruction)
    @staticmethod
    def is_navigable_string(obj):
        """Is navigable string."""
        import bs4
        return isinstance(obj, bs4.NavigableString)
    @staticmethod
    def is_special_string(obj):
        """Is special string."""
        import bs4
        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
    @classmethod
@ -296,36 +277,68 @@ class _DocumentNav(object):
        return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
-    @staticmethod
+    @classmethod
-    def get_attribute_by_name(el, name, default=None):
+    def normalize_value(cls, value):
        """Normalize the value to be a string or list of strings."""
        # Treat `None` as empty string.
        if value is None:
            return ''
        # Pass through strings
        if (isinstance(value, str)):
            return value
        # If it's a byte string, convert it to Unicode, treating it as UTF-8.
        if isinstance(value, bytes):
            return value.decode("utf8")
        # BeautifulSoup supports sequences of attribute values, so make sure the children are strings.
        if isinstance(value, Sequence):
            new_value = []
            for v in value:
                if isinstance(v, Sequence):
                    # This is most certainly a user error and will crash and burn later,
                    # but to avoid excessive recursion, kick out now.
                    new_value.append(v)
                else:
                    # Convert the child to a string
                    new_value.append(cls.normalize_value(v))
            return new_value
        # Try and make anything else a string
        return str(value)
    @classmethod
    def get_attribute_by_name(cls, el, name, default=None):
        """Get attribute by name."""
        value = default
        if el._is_xml:
            try:
-                value = el.attrs[name]
+                value = cls.normalize_value(el.attrs[name])
            except KeyError:
                pass
        else:
            for k, v in el.attrs.items():
                if util.lower(k) == name:
-                    value = v
+                    value = cls.normalize_value(v)
                    break
        return value
-    @staticmethod
+    @classmethod
-    def iter_attributes(el):
+    def iter_attributes(cls, el):
        """Iterate attributes."""
        for k, v in el.attrs.items():
-            yield k, v
+            yield k, cls.normalize_value(v)
    @classmethod
    def get_classes(cls, el):
        """Get classes."""
        classes = cls.get_attribute_by_name(el, 'class', [])
-        if isinstance(classes, util.ustr):
+        if isinstance(classes, str):
            classes = RE_NOT_WS.findall(classes)
        return classes
@ -336,6 +349,11 @@ class _DocumentNav(object):
            [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
        )
    def get_own_text(self, el, no_iframe=False):
        """Get Own Text."""
        return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
 class Inputs(object):
    """Class for parsing and validating input items."""
@ -963,12 +981,23 @@ class _Match(object):
        content = None
        for contain_list in contains:
            if content is None:
-                content = self.get_text(el, no_iframe=self.is_html)
+                if contain_list.own:
                    content = self.get_own_text(el, no_iframe=self.is_html)
                else:
                    content = self.get_text(el, no_iframe=self.is_html)
            found = False
            for text in contain_list.text:
-                if text in content:
+                if contain_list.own:
-                    found = True
+                    for c in content:
-                    break
+                        if text in c:
                            found = True
                            break
                    if found:
                        break
                else:
                    if text in content:
                        found = True
                        break
            if not found:
                match = False
        return match
@ -1429,30 +1458,6 @@ class CSSMatch(_DocumentNav, _Match):
    """The Beautiful Soup CSS match class."""
 class CommentsMatch(_DocumentNav):
    """Comments matcher."""
    def __init__(self, el):
        """Initialize."""
        self.assert_valid_input(el)
        self.tag = el
    def get_comments(self, limit=0):
        """Get comments."""
        if limit < 1:
            limit = None
        for child in self.get_descendants(self.tag, tags=False):
            if self.is_comment(child):
                yield child
                if limit is not None:
                    limit -= 1
                    if limit < 1:
                        break
 class SoupSieve(ct.Immutable):
    """Compiled Soup Sieve selector matching object."""
@ -1496,19 +1501,6 @@ class SoupSieve(ct.Immutable):
        else:
            return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
    @util.deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
    def comments(self, tag, limit=0):
        """Get comments only."""
        return [comment for comment in CommentsMatch(tag).get_comments(limit)]
    @util.deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
    def icomments(self, tag, limit=0):
        """Iterate comments only."""
        for comment in CommentsMatch(tag).get_comments(limit):
            yield comment
    def select_one(self, tag):
        """Select a single tag."""
--- a/lib/soupsieve/css_parser.py
+++ b/lib/soupsieve/css_parser.py
@ -1,10 +1,11 @@
 """CSS selector parser."""
 from __future__ import unicode_literals
 import re
 from functools import lru_cache
 from . import util
 from . import css_match as cm
 from . import css_types as ct
 from .util import SelectorSyntaxError
 import warnings
 UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -59,6 +60,8 @@ PSEUDO_SIMPLE_NO_MATCH = {
 # Complex pseudo classes that take selector lists
 PSEUDO_COMPLEX = {
    ':contains',
    ':-soup-contains',
    ':-soup-contains-own',
    ':has',
    ':is',
    ':matches',
@ -117,9 +120,11 @@ PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
 # Classes (`.class`)
 PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
 # Prefix:Tag (`prefix|tag`)
-PAT_TAG = r'(?:(?:{ident}|\*)?\|)?(?:{ident}|\*)'.format(ident=IDENTIFIER)
+PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
 # Attributes (`[attr]`, `[attr=value]`, etc.)
-PAT_ATTR = r'\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}'.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
+PAT_ATTR = r'''
 \[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
 '''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
 # Pseudo class (`:pseudo-class`, `:pseudo-class(`)
 PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
 # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
@ -196,7 +201,7 @@ FLG_PLACEHOLDER_SHOWN = 0x200
 _MAXCACHE = 500
-@util.lru_cache(maxsize=_MAXCACHE)
+@lru_cache(maxsize=_MAXCACHE)
 def _cached_css_compile(pattern, namespaces, custom, flags):
    """Cached CSS compile."""
@ -245,7 +250,7 @@ def css_unescape(content, string=False):
            codepoint = int(m.group(1)[1:], 16)
            if codepoint == 0:
                codepoint = UNICODE_REPLACEMENT_CHAR
-            value = util.uchr(codepoint)
+            value = chr(codepoint)
        elif m.group(2):
            value = m.group(2)[1:]
        elif m.group(3):
@ -269,7 +274,7 @@ def escape(ident):
        string.append('\\{}'.format(ident))
    else:
        for index, c in enumerate(ident):
-            codepoint = util.uord(c)
+            codepoint = ord(c)
            if codepoint == 0x00:
                string.append('\ufffd')
            elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
@ -300,12 +305,7 @@ class SelectorPattern(object):
        return self.name
-    def enabled(self, flags):
+    def match(self, selector, index, flags):
        """Enabled."""
        return True
    def match(self, selector, index):
        """Match the selector."""
        return self.re_pattern.match(selector, index)
@ -320,7 +320,7 @@ class SpecialPseudoPattern(SelectorPattern):
        self.patterns = {}
        for p in patterns:
            name = p[0]
-            pattern = SelectorPattern(name, p[2])
+            pattern = p[3](name, p[2])
            for pseudo in p[1]:
                self.patterns[pseudo] = pattern
@ -332,12 +332,7 @@ class SpecialPseudoPattern(SelectorPattern):
        return self.matched_name.get_name()
-    def enabled(self, flags):
+    def match(self, selector, index, flags):
        """Enabled."""
        return True
    def match(self, selector, index):
        """Match the selector."""
        pseudo = None
@ -346,7 +341,7 @@ class SpecialPseudoPattern(SelectorPattern):
            name = util.lower(css_unescape(m.group('name')))
            pattern = self.patterns.get(name)
            if pattern:
-                pseudo = pattern.match(selector, index)
+                pseudo = pattern.match(selector, index, flags)
                if pseudo:
                    self.matched_name = pattern
@ -429,11 +424,16 @@ class CSSParser(object):
        SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
        SpecialPseudoPattern(
            (
-                ("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS),
+                (
-                ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD),
+                    "pseudo_contains",
-                ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE),
+                    (':contains', ':-soup-contains', ':-soup-contains-own'),
-                ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG),
+                    PAT_PSEUDO_CONTAINS,
-                ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR)
+                    SelectorPattern
                ),
                ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
                ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
                ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
                ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
            )
        ),
        SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
@ -461,15 +461,11 @@ class CSSParser(object):
        inverse = False
        op = m.group('cmp')
        case = util.lower(m.group('case')) if m.group('case') else None
-        parts = [css_unescape(a) for a in m.group('ns_attr').split('|')]
+        ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
-        ns = ''
+        attr = css_unescape(m.group('attr_name'))
        is_type = False
        pattern2 = None
-        if len(parts) > 1:
+
            ns = parts[0]
            attr = parts[1]
        else:
            attr = parts[0]
        if case:
            flags = re.I if case == 'i' else 0
        elif util.lower(attr) == 'type':
@ -532,13 +528,8 @@ class CSSParser(object):
    def parse_tag_pattern(self, sel, m, has_selector):
        """Parse tag pattern from regex match."""
-        parts = [css_unescape(x) for x in m.group(0).split('|')]
+        prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
-        if len(parts) > 1:
+        tag = css_unescape(m.group('tag_name'))
            prefix = parts[0]
            tag = parts[1]
        else:
            tag = parts[0]
            prefix = None
        sel.tag = ct.SelectorTag(tag, prefix)
        has_selector = True
        return has_selector
@ -817,7 +808,14 @@ class CSSParser(object):
    def parse_pseudo_contains(self, sel, m, has_selector):
        """Parse contains."""
-        values = m.group('values')
+        pseudo = util.lower(css_unescape(m.group('name')))
        if pseudo == ":contains":
            warnings.warn(
                "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
                FutureWarning
            )
        contains_own = pseudo == ":-soup-contains-own"
        values = css_unescape(m.group('values'))
        patterns = []
        for token in RE_VALUES.finditer(values):
            if token.group('split'):
@ -828,7 +826,7 @@ class CSSParser(object):
            else:
                value = css_unescape(value)
            patterns.append(value)
-        sel.contains.append(ct.SelectorContains(tuple(patterns)))
+        sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own))
        has_selector = True
        return has_selector
@ -918,7 +916,7 @@ class CSSParser(object):
                elif key == 'pseudo_class':
                    has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
                elif key == 'pseudo_element':
-                    raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0)))
+                    raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
                elif key == 'pseudo_contains':
                    has_selector = self.parse_pseudo_contains(sel, m, has_selector)
                elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@ -1027,9 +1025,7 @@ class CSSParser(object):
        while index <= end:
            m = None
            for v in self.css_tokens:
-                if not v.enabled(self.flags):  # pragma: no cover
+                m = v.match(pattern, index, self.flags)
                    continue
                m = v.match(pattern, index)
                if m:
                    name = v.get_name()
                    if self.debug:  # pragma: no cover
@ -1067,7 +1063,7 @@ class CSSParser(object):
 # CSS pattern for `:link` and `:any-link`
 CSS_LINK = CSSParser(
-    'html|*:is(a, area, link)[href]'
+    'html|*:is(a, area)[href]'
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:checked`
 CSS_CHECKED = CSSParser(
@ -1098,23 +1094,23 @@ CSS_INDETERMINATE = CSSParser(
    This pattern must be at the end.
    Special logic is applied to the last selector.
    */
-    html|input[type="radio"][name][name!='']:not([checked])
+    html|input[type="radio"][name]:not([name='']):not([checked])
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
 # CSS pattern for `:disabled`
 CSS_DISABLED = CSSParser(
    '''
-    html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
+    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
    html|optgroup[disabled] > html|option,
-    html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
+    html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
    html|fieldset[disabled] >
-        html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
+        html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:enabled`
 CSS_ENABLED = CSSParser(
    '''
-    html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
+    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:required`
@ -1138,8 +1134,8 @@ CSS_PLACEHOLDER_SHOWN = CSSParser(
        [type=email],
        [type=password],
        [type=number]
-    )[placeholder][placeholder!='']:is(:not([value]), [value=""]),
+    )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
-    html|textarea[placeholder][placeholder!='']
+    html|textarea[placeholder]:not([placeholder=''])
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
 # CSS pattern default for `:nth-child` "of S" feature
--- a/lib/soupsieve/css_types.py
+++ b/lib/soupsieve/css_types.py
@ -1,6 +1,6 @@
 """CSS selector structure items."""
-from __future__ import unicode_literals
+import copyreg
-from . import util
+from collections.abc import Hashable, Mapping
 __all__ = (
    'Selector',
@ -86,21 +86,21 @@ class Immutable(object):
    __str__ = __repr__
-class ImmutableDict(util.Mapping):
+class ImmutableDict(Mapping):
    """Hashable, immutable dictionary."""
-    def __init__(self, *args, **kwargs):
+    def __init__(self, arg):
        """Initialize."""
-        arg = args[0] if args else kwargs
+        arg
        is_dict = isinstance(arg, dict)
        if (
-            is_dict and not all([isinstance(v, util.Hashable) for v in arg.values()]) or
+            is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
-            not is_dict and not all([isinstance(k, util.Hashable) and isinstance(v, util.Hashable) for k, v in arg])
+            not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
        ):
            raise TypeError('All values must be hashable')
-        self._d = dict(*args, **kwargs)
+        self._d = dict(arg)
        self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
    def __iter__(self):
@ -133,39 +133,37 @@ class ImmutableDict(util.Mapping):
 class Namespaces(ImmutableDict):
    """Namespaces."""
-    def __init__(self, *args, **kwargs):
+    def __init__(self, arg):
        """Initialize."""
        # If there are arguments, check the first index.
        # `super` should fail if the user gave multiple arguments,
        # so don't bother checking that.
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
-        if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
+        if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
            raise TypeError('Namespace keys and values must be Unicode strings')
-        elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
+        elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
            raise TypeError('Namespace keys and values must be Unicode strings')
-        super(Namespaces, self).__init__(*args, **kwargs)
+        super(Namespaces, self).__init__(arg)
 class CustomSelectors(ImmutableDict):
    """Custom selectors."""
-    def __init__(self, *args, **kwargs):
+    def __init__(self, arg):
        """Initialize."""
        # If there are arguments, check the first index.
        # `super` should fail if the user gave multiple arguments,
        # so don't bother checking that.
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
-        if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
+        if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
            raise TypeError('CustomSelectors keys and values must be Unicode strings')
-        elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
+        elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
            raise TypeError('CustomSelectors keys and values must be Unicode strings')
-        super(CustomSelectors, self).__init__(*args, **kwargs)
+        super(CustomSelectors, self).__init__(arg)
 class Selector(Immutable):
@ -239,13 +237,14 @@ class SelectorAttribute(Immutable):
 class SelectorContains(Immutable):
    """Selector contains rule."""
-    __slots__ = ("text", "_hash")
+    __slots__ = ("text", "own", "_hash")
-    def __init__(self, text):
+    def __init__(self, text, own):
        """Initialize."""
        super(SelectorContains, self).__init__(
-            text=text
+            text=text,
            own=own
        )
@ -332,7 +331,7 @@ def _pickle(p):
 def pickle_register(obj):
    """Allow object to be pickled."""
-    util.copyreg.pickle(obj, _pickle)
+    copyreg.pickle(obj, _pickle)
 pickle_register(Selector)
--- a/lib/soupsieve/util.py
+++ b/lib/soupsieve/util.py
@ -1,46 +1,17 @@
 """Utility."""
-from __future__ import unicode_literals
+from functools import wraps, lru_cache
 from functools import wraps
 import warnings
 import sys
 import struct
 import os
 import re
 MODULE = os.path.dirname(__file__)
 PY3 = sys.version_info >= (3, 0)
 PY35 = sys.version_info >= (3, 5)
 PY37 = sys.version_info >= (3, 7)
 if PY3:
    from functools import lru_cache  # noqa F401
    import copyreg  # noqa F401
    from collections.abc import Hashable, Mapping  # noqa F401
    ustr = str
    bstr = bytes
    unichar = chr
    string = str
 else:
    from backports.functools_lru_cache import lru_cache  # noqa F401
    import copy_reg as copyreg  # noqa F401
    from collections import Hashable, Mapping  # noqa F401
    ustr = unicode  # noqa: F821
    bstr = str
    unichar = unichr  # noqa: F821
    string = basestring  # noqa: F821
 DEBUG = 0x00001
 RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
 LC_A = ord('a')
 LC_Z = ord('z')
 UC_A = ord('A')
 UC_Z = ord('Z')
@lru_cache(maxsize=512)
 def lower(string):
    """Lower."""
@ -51,38 +22,7 @@ def lower(string):
    return ''.join(new_string)
-def upper(string):  # pragma: no cover
+class SelectorSyntaxError(Exception):
    """Lower."""
    new_string = []
    for c in string:
        o = ord(c)
        new_string.append(chr(o - 32) if LC_A <= o <= LC_Z else c)
    return ''.join(new_string)
 def uchr(i):
    """Allow getting Unicode character on narrow python builds."""
    try:
        return unichar(i)
    except ValueError:  # pragma: no cover
        return struct.pack('i', i).decode('utf-32')
 def uord(c):
    """Get Unicode ordinal."""
    if len(c) == 2:  # pragma: no cover
        high, low = [ord(p) for p in c]
        ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
    else:
        ordinal = ord(c)
    return ordinal
 class SelectorSyntaxError(SyntaxError):
    """Syntax error in a CSS selector."""
    def __init__(self, msg, pattern=None, index=None):