Update soupsieve-2.2.1

This commit is contained in:
JonnyWong16 2021-10-14 20:45:43 -07:00
parent 9a54fb9a44
commit b581460b51
No known key found for this signature in database
GPG key ID: B1F1F9807184697A
6 changed files with 146 additions and 233 deletions

View file

@ -25,17 +25,16 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. SOFTWARE.
""" """
from __future__ import unicode_literals
from .__meta__ import __version__, __version_info__ # noqa: F401 from .__meta__ import __version__, __version_info__ # noqa: F401
from . import css_parser as cp from . import css_parser as cp
from . import css_match as cm from . import css_match as cm
from . import css_types as ct from . import css_types as ct
from .util import DEBUG, deprecated, SelectorSyntaxError # noqa: F401 from .util import DEBUG, SelectorSyntaxError # noqa: F401
__all__ = ( __all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve', 'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
'closest', 'comments', 'compile', 'filter', 'icomments', 'closest', 'compile', 'filter', 'iselect',
'iselect', 'match', 'select', 'select_one' 'match', 'select', 'select_one'
) )
SoupSieve = cm.SoupSieve SoupSieve = cm.SoupSieve
@ -45,11 +44,11 @@ def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
"""Compile CSS pattern.""" """Compile CSS pattern."""
if namespaces is not None: if namespaces is not None:
namespaces = ct.Namespaces(**namespaces) namespaces = ct.Namespaces(namespaces)
custom = kwargs.get('custom') custom = kwargs.get('custom')
if custom is not None: if custom is not None:
custom = ct.CustomSelectors(**custom) custom = ct.CustomSelectors(custom)
if isinstance(pattern, SoupSieve): if isinstance(pattern, SoupSieve):
if flags: if flags:
@ -87,21 +86,6 @@ def filter(select, iterable, namespaces=None, flags=0, **kwargs): # noqa: A001
return compile(select, namespaces, flags, **kwargs).filter(iterable) return compile(select, namespaces, flags, **kwargs).filter(iterable)
@deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
def comments(tag, limit=0, flags=0, **kwargs):
"""Get comments only."""
return [comment for comment in cm.CommentsMatch(tag).get_comments(limit)]
@deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
def icomments(tag, limit=0, flags=0, **kwargs):
"""Iterate comments only."""
for comment in cm.CommentsMatch(tag).get_comments(limit):
yield comment
def select_one(select, tag, namespaces=None, flags=0, **kwargs): def select_one(select, tag, namespaces=None, flags=0, **kwargs):
"""Select a single tag.""" """Select a single tag."""

View file

@ -1,5 +1,4 @@
"""Meta related things.""" """Meta related things."""
from __future__ import unicode_literals
from collections import namedtuple from collections import namedtuple
import re import re
@ -154,11 +153,14 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
return ver return ver
def parse_version(ver, pre=False): def parse_version(ver):
"""Parse version into a comparable Version tuple.""" """Parse version into a comparable Version tuple."""
m = RE_VER.match(ver) m = RE_VER.match(ver)
if m is None:
raise ValueError("'{}' is not a valid version".format(ver))
# Handle major, minor, micro # Handle major, minor, micro
major = int(m.group('major')) major = int(m.group('major'))
minor = int(m.group('minor')) if m.group('minor') else 0 minor = int(m.group('minor')) if m.group('minor') else 0
@ -186,5 +188,5 @@ def parse_version(ver, pre=False):
return Version(major, minor, micro, release, pre, post, dev) return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(1, 9, 5, "final") __version_info__ = Version(2, 2, 1, "final")
__version__ = __version_info__._get_canonical() __version__ = __version_info__._get_canonical()

View file

@ -1,10 +1,12 @@
"""CSS matcher.""" """CSS matcher."""
from __future__ import unicode_literals
from datetime import datetime from datetime import datetime
from . import util from . import util
import re import re
from .import css_types as ct from .import css_types as ct
import unicodedata import unicodedata
from collections.abc import Sequence
import bs4
# Empty tag pattern (whitespace okay) # Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]') RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -88,57 +90,36 @@ class _DocumentNav(object):
@staticmethod @staticmethod
def is_doc(obj): def is_doc(obj):
"""Is `BeautifulSoup` object.""" """Is `BeautifulSoup` object."""
import bs4
return isinstance(obj, bs4.BeautifulSoup) return isinstance(obj, bs4.BeautifulSoup)
@staticmethod @staticmethod
def is_tag(obj): def is_tag(obj):
"""Is tag.""" """Is tag."""
import bs4
return isinstance(obj, bs4.Tag) return isinstance(obj, bs4.Tag)
@staticmethod
def is_comment(obj):
"""Is comment."""
import bs4
return isinstance(obj, bs4.Comment)
@staticmethod @staticmethod
def is_declaration(obj): # pragma: no cover def is_declaration(obj): # pragma: no cover
"""Is declaration.""" """Is declaration."""
import bs4
return isinstance(obj, bs4.Declaration) return isinstance(obj, bs4.Declaration)
@staticmethod @staticmethod
def is_cdata(obj): def is_cdata(obj):
"""Is CDATA.""" """Is CDATA."""
import bs4
return isinstance(obj, bs4.CData) return isinstance(obj, bs4.CData)
@staticmethod @staticmethod
def is_processing_instruction(obj): # pragma: no cover def is_processing_instruction(obj): # pragma: no cover
"""Is processing instruction.""" """Is processing instruction."""
import bs4
return isinstance(obj, bs4.ProcessingInstruction) return isinstance(obj, bs4.ProcessingInstruction)
@staticmethod @staticmethod
def is_navigable_string(obj): def is_navigable_string(obj):
"""Is navigable string.""" """Is navigable string."""
import bs4
return isinstance(obj, bs4.NavigableString) return isinstance(obj, bs4.NavigableString)
@staticmethod @staticmethod
def is_special_string(obj): def is_special_string(obj):
"""Is special string.""" """Is special string."""
import bs4
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
@classmethod @classmethod
@ -296,36 +277,68 @@ class _DocumentNav(object):
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None) return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@staticmethod @classmethod
def get_attribute_by_name(el, name, default=None): def normalize_value(cls, value):
"""Normalize the value to be a string or list of strings."""
# Treat `None` as empty string.
if value is None:
return ''
# Pass through strings
if (isinstance(value, str)):
return value
# If it's a byte string, convert it to Unicode, treating it as UTF-8.
if isinstance(value, bytes):
return value.decode("utf8")
# BeautifulSoup supports sequences of attribute values, so make sure the children are strings.
if isinstance(value, Sequence):
new_value = []
for v in value:
if isinstance(v, Sequence):
# This is most certainly a user error and will crash and burn later,
# but to avoid excessive recursion, kick out now.
new_value.append(v)
else:
# Convert the child to a string
new_value.append(cls.normalize_value(v))
return new_value
# Try and make anything else a string
return str(value)
@classmethod
def get_attribute_by_name(cls, el, name, default=None):
"""Get attribute by name.""" """Get attribute by name."""
value = default value = default
if el._is_xml: if el._is_xml:
try: try:
value = el.attrs[name] value = cls.normalize_value(el.attrs[name])
except KeyError: except KeyError:
pass pass
else: else:
for k, v in el.attrs.items(): for k, v in el.attrs.items():
if util.lower(k) == name: if util.lower(k) == name:
value = v value = cls.normalize_value(v)
break break
return value return value
@staticmethod @classmethod
def iter_attributes(el): def iter_attributes(cls, el):
"""Iterate attributes.""" """Iterate attributes."""
for k, v in el.attrs.items(): for k, v in el.attrs.items():
yield k, v yield k, cls.normalize_value(v)
@classmethod @classmethod
def get_classes(cls, el): def get_classes(cls, el):
"""Get classes.""" """Get classes."""
classes = cls.get_attribute_by_name(el, 'class', []) classes = cls.get_attribute_by_name(el, 'class', [])
if isinstance(classes, util.ustr): if isinstance(classes, str):
classes = RE_NOT_WS.findall(classes) classes = RE_NOT_WS.findall(classes)
return classes return classes
@ -336,6 +349,11 @@ class _DocumentNav(object):
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)] [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
) )
def get_own_text(self, el, no_iframe=False):
"""Get Own Text."""
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
class Inputs(object): class Inputs(object):
"""Class for parsing and validating input items.""" """Class for parsing and validating input items."""
@ -963,12 +981,23 @@ class _Match(object):
content = None content = None
for contain_list in contains: for contain_list in contains:
if content is None: if content is None:
content = self.get_text(el, no_iframe=self.is_html) if contain_list.own:
content = self.get_own_text(el, no_iframe=self.is_html)
else:
content = self.get_text(el, no_iframe=self.is_html)
found = False found = False
for text in contain_list.text: for text in contain_list.text:
if text in content: if contain_list.own:
found = True for c in content:
break if text in c:
found = True
break
if found:
break
else:
if text in content:
found = True
break
if not found: if not found:
match = False match = False
return match return match
@ -1429,30 +1458,6 @@ class CSSMatch(_DocumentNav, _Match):
"""The Beautiful Soup CSS match class.""" """The Beautiful Soup CSS match class."""
class CommentsMatch(_DocumentNav):
"""Comments matcher."""
def __init__(self, el):
"""Initialize."""
self.assert_valid_input(el)
self.tag = el
def get_comments(self, limit=0):
"""Get comments."""
if limit < 1:
limit = None
for child in self.get_descendants(self.tag, tags=False):
if self.is_comment(child):
yield child
if limit is not None:
limit -= 1
if limit < 1:
break
class SoupSieve(ct.Immutable): class SoupSieve(ct.Immutable):
"""Compiled Soup Sieve selector matching object.""" """Compiled Soup Sieve selector matching object."""
@ -1496,19 +1501,6 @@ class SoupSieve(ct.Immutable):
else: else:
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)] return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
@util.deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
def comments(self, tag, limit=0):
"""Get comments only."""
return [comment for comment in CommentsMatch(tag).get_comments(limit)]
@util.deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
def icomments(self, tag, limit=0):
"""Iterate comments only."""
for comment in CommentsMatch(tag).get_comments(limit):
yield comment
def select_one(self, tag): def select_one(self, tag):
"""Select a single tag.""" """Select a single tag."""

View file

@ -1,10 +1,11 @@
"""CSS selector parser.""" """CSS selector parser."""
from __future__ import unicode_literals
import re import re
from functools import lru_cache
from . import util from . import util
from . import css_match as cm from . import css_match as cm
from . import css_types as ct from . import css_types as ct
from .util import SelectorSyntaxError from .util import SelectorSyntaxError
import warnings
UNICODE_REPLACEMENT_CHAR = 0xFFFD UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -59,6 +60,8 @@ PSEUDO_SIMPLE_NO_MATCH = {
# Complex pseudo classes that take selector lists # Complex pseudo classes that take selector lists
PSEUDO_COMPLEX = { PSEUDO_COMPLEX = {
':contains', ':contains',
':-soup-contains',
':-soup-contains-own',
':has', ':has',
':is', ':is',
':matches', ':matches',
@ -117,9 +120,11 @@ PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
# Classes (`.class`) # Classes (`.class`)
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
# Prefix:Tag (`prefix|tag`) # Prefix:Tag (`prefix|tag`)
PAT_TAG = r'(?:(?:{ident}|\*)?\|)?(?:{ident}|\*)'.format(ident=IDENTIFIER) PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
# Attributes (`[attr]`, `[attr=value]`, etc.) # Attributes (`[attr]`, `[attr=value]`, etc.)
PAT_ATTR = r'\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}'.format(ws=WSC, ident=IDENTIFIER, attr=ATTR) PAT_ATTR = r'''
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
# Pseudo class (`:pseudo-class`, `:pseudo-class(`) # Pseudo class (`:pseudo-class`, `:pseudo-class(`)
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
@ -196,7 +201,7 @@ FLG_PLACEHOLDER_SHOWN = 0x200
_MAXCACHE = 500 _MAXCACHE = 500
@util.lru_cache(maxsize=_MAXCACHE) @lru_cache(maxsize=_MAXCACHE)
def _cached_css_compile(pattern, namespaces, custom, flags): def _cached_css_compile(pattern, namespaces, custom, flags):
"""Cached CSS compile.""" """Cached CSS compile."""
@ -245,7 +250,7 @@ def css_unescape(content, string=False):
codepoint = int(m.group(1)[1:], 16) codepoint = int(m.group(1)[1:], 16)
if codepoint == 0: if codepoint == 0:
codepoint = UNICODE_REPLACEMENT_CHAR codepoint = UNICODE_REPLACEMENT_CHAR
value = util.uchr(codepoint) value = chr(codepoint)
elif m.group(2): elif m.group(2):
value = m.group(2)[1:] value = m.group(2)[1:]
elif m.group(3): elif m.group(3):
@ -269,7 +274,7 @@ def escape(ident):
string.append('\\{}'.format(ident)) string.append('\\{}'.format(ident))
else: else:
for index, c in enumerate(ident): for index, c in enumerate(ident):
codepoint = util.uord(c) codepoint = ord(c)
if codepoint == 0x00: if codepoint == 0x00:
string.append('\ufffd') string.append('\ufffd')
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
@ -300,12 +305,7 @@ class SelectorPattern(object):
return self.name return self.name
def enabled(self, flags): def match(self, selector, index, flags):
"""Enabled."""
return True
def match(self, selector, index):
"""Match the selector.""" """Match the selector."""
return self.re_pattern.match(selector, index) return self.re_pattern.match(selector, index)
@ -320,7 +320,7 @@ class SpecialPseudoPattern(SelectorPattern):
self.patterns = {} self.patterns = {}
for p in patterns: for p in patterns:
name = p[0] name = p[0]
pattern = SelectorPattern(name, p[2]) pattern = p[3](name, p[2])
for pseudo in p[1]: for pseudo in p[1]:
self.patterns[pseudo] = pattern self.patterns[pseudo] = pattern
@ -332,12 +332,7 @@ class SpecialPseudoPattern(SelectorPattern):
return self.matched_name.get_name() return self.matched_name.get_name()
def enabled(self, flags): def match(self, selector, index, flags):
"""Enabled."""
return True
def match(self, selector, index):
"""Match the selector.""" """Match the selector."""
pseudo = None pseudo = None
@ -346,7 +341,7 @@ class SpecialPseudoPattern(SelectorPattern):
name = util.lower(css_unescape(m.group('name'))) name = util.lower(css_unescape(m.group('name')))
pattern = self.patterns.get(name) pattern = self.patterns.get(name)
if pattern: if pattern:
pseudo = pattern.match(selector, index) pseudo = pattern.match(selector, index, flags)
if pseudo: if pseudo:
self.matched_name = pattern self.matched_name = pattern
@ -429,11 +424,16 @@ class CSSParser(object):
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE), SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
SpecialPseudoPattern( SpecialPseudoPattern(
( (
("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS), (
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD), "pseudo_contains",
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE), (':contains', ':-soup-contains', ':-soup-contains-own'),
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG), PAT_PSEUDO_CONTAINS,
("pseudo_dir", (':dir',), PAT_PSEUDO_DIR) SelectorPattern
),
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
) )
), ),
SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
@ -461,15 +461,11 @@ class CSSParser(object):
inverse = False inverse = False
op = m.group('cmp') op = m.group('cmp')
case = util.lower(m.group('case')) if m.group('case') else None case = util.lower(m.group('case')) if m.group('case') else None
parts = [css_unescape(a) for a in m.group('ns_attr').split('|')] ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
ns = '' attr = css_unescape(m.group('attr_name'))
is_type = False is_type = False
pattern2 = None pattern2 = None
if len(parts) > 1:
ns = parts[0]
attr = parts[1]
else:
attr = parts[0]
if case: if case:
flags = re.I if case == 'i' else 0 flags = re.I if case == 'i' else 0
elif util.lower(attr) == 'type': elif util.lower(attr) == 'type':
@ -532,13 +528,8 @@ class CSSParser(object):
def parse_tag_pattern(self, sel, m, has_selector): def parse_tag_pattern(self, sel, m, has_selector):
"""Parse tag pattern from regex match.""" """Parse tag pattern from regex match."""
parts = [css_unescape(x) for x in m.group(0).split('|')] prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
if len(parts) > 1: tag = css_unescape(m.group('tag_name'))
prefix = parts[0]
tag = parts[1]
else:
tag = parts[0]
prefix = None
sel.tag = ct.SelectorTag(tag, prefix) sel.tag = ct.SelectorTag(tag, prefix)
has_selector = True has_selector = True
return has_selector return has_selector
@ -817,7 +808,14 @@ class CSSParser(object):
def parse_pseudo_contains(self, sel, m, has_selector): def parse_pseudo_contains(self, sel, m, has_selector):
"""Parse contains.""" """Parse contains."""
values = m.group('values') pseudo = util.lower(css_unescape(m.group('name')))
if pseudo == ":contains":
warnings.warn(
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
FutureWarning
)
contains_own = pseudo == ":-soup-contains-own"
values = css_unescape(m.group('values'))
patterns = [] patterns = []
for token in RE_VALUES.finditer(values): for token in RE_VALUES.finditer(values):
if token.group('split'): if token.group('split'):
@ -828,7 +826,7 @@ class CSSParser(object):
else: else:
value = css_unescape(value) value = css_unescape(value)
patterns.append(value) patterns.append(value)
sel.contains.append(ct.SelectorContains(tuple(patterns))) sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own))
has_selector = True has_selector = True
return has_selector return has_selector
@ -918,7 +916,7 @@ class CSSParser(object):
elif key == 'pseudo_class': elif key == 'pseudo_class':
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
elif key == 'pseudo_element': elif key == 'pseudo_element':
raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0))) raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
elif key == 'pseudo_contains': elif key == 'pseudo_contains':
has_selector = self.parse_pseudo_contains(sel, m, has_selector) has_selector = self.parse_pseudo_contains(sel, m, has_selector)
elif key in ('pseudo_nth_type', 'pseudo_nth_child'): elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@ -1027,9 +1025,7 @@ class CSSParser(object):
while index <= end: while index <= end:
m = None m = None
for v in self.css_tokens: for v in self.css_tokens:
if not v.enabled(self.flags): # pragma: no cover m = v.match(pattern, index, self.flags)
continue
m = v.match(pattern, index)
if m: if m:
name = v.get_name() name = v.get_name()
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
@ -1067,7 +1063,7 @@ class CSSParser(object):
# CSS pattern for `:link` and `:any-link` # CSS pattern for `:link` and `:any-link`
CSS_LINK = CSSParser( CSS_LINK = CSSParser(
'html|*:is(a, area, link)[href]' 'html|*:is(a, area)[href]'
).process_selectors(flags=FLG_PSEUDO | FLG_HTML) ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:checked` # CSS pattern for `:checked`
CSS_CHECKED = CSSParser( CSS_CHECKED = CSSParser(
@ -1098,23 +1094,23 @@ CSS_INDETERMINATE = CSSParser(
This pattern must be at the end. This pattern must be at the end.
Special logic is applied to the last selector. Special logic is applied to the last selector.
*/ */
html|input[type="radio"][name][name!='']:not([checked]) html|input[type="radio"][name]:not([name='']):not([checked])
''' '''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE) ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
# CSS pattern for `:disabled` # CSS pattern for `:disabled`
CSS_DISABLED = CSSParser( CSS_DISABLED = CSSParser(
''' '''
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
html|optgroup[disabled] > html|option, html|optgroup[disabled] > html|option,
html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset), html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
html|fieldset[disabled] > html|fieldset[disabled] >
html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset) html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
''' '''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML) ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:enabled` # CSS pattern for `:enabled`
CSS_ENABLED = CSSParser( CSS_ENABLED = CSSParser(
''' '''
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
''' '''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML) ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:required` # CSS pattern for `:required`
@ -1138,8 +1134,8 @@ CSS_PLACEHOLDER_SHOWN = CSSParser(
[type=email], [type=email],
[type=password], [type=password],
[type=number] [type=number]
)[placeholder][placeholder!='']:is(:not([value]), [value=""]), )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
html|textarea[placeholder][placeholder!=''] html|textarea[placeholder]:not([placeholder=''])
''' '''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN) ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
# CSS pattern default for `:nth-child` "of S" feature # CSS pattern default for `:nth-child` "of S" feature

View file

@ -1,6 +1,6 @@
"""CSS selector structure items.""" """CSS selector structure items."""
from __future__ import unicode_literals import copyreg
from . import util from collections.abc import Hashable, Mapping
__all__ = ( __all__ = (
'Selector', 'Selector',
@ -86,21 +86,21 @@ class Immutable(object):
__str__ = __repr__ __str__ = __repr__
class ImmutableDict(util.Mapping): class ImmutableDict(Mapping):
"""Hashable, immutable dictionary.""" """Hashable, immutable dictionary."""
def __init__(self, *args, **kwargs): def __init__(self, arg):
"""Initialize.""" """Initialize."""
arg = args[0] if args else kwargs arg
is_dict = isinstance(arg, dict) is_dict = isinstance(arg, dict)
if ( if (
is_dict and not all([isinstance(v, util.Hashable) for v in arg.values()]) or is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
not is_dict and not all([isinstance(k, util.Hashable) and isinstance(v, util.Hashable) for k, v in arg]) not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
): ):
raise TypeError('All values must be hashable') raise TypeError('All values must be hashable')
self._d = dict(*args, **kwargs) self._d = dict(arg)
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())])) self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
def __iter__(self): def __iter__(self):
@ -133,39 +133,37 @@ class ImmutableDict(util.Mapping):
class Namespaces(ImmutableDict): class Namespaces(ImmutableDict):
"""Namespaces.""" """Namespaces."""
def __init__(self, *args, **kwargs): def __init__(self, arg):
"""Initialize.""" """Initialize."""
# If there are arguments, check the first index. # If there are arguments, check the first index.
# `super` should fail if the user gave multiple arguments, # `super` should fail if the user gave multiple arguments,
# so don't bother checking that. # so don't bother checking that.
arg = args[0] if args else kwargs
is_dict = isinstance(arg, dict) is_dict = isinstance(arg, dict)
if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]): if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
raise TypeError('Namespace keys and values must be Unicode strings') raise TypeError('Namespace keys and values must be Unicode strings')
elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]): elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('Namespace keys and values must be Unicode strings') raise TypeError('Namespace keys and values must be Unicode strings')
super(Namespaces, self).__init__(*args, **kwargs) super(Namespaces, self).__init__(arg)
class CustomSelectors(ImmutableDict): class CustomSelectors(ImmutableDict):
"""Custom selectors.""" """Custom selectors."""
def __init__(self, *args, **kwargs): def __init__(self, arg):
"""Initialize.""" """Initialize."""
# If there are arguments, check the first index. # If there are arguments, check the first index.
# `super` should fail if the user gave multiple arguments, # `super` should fail if the user gave multiple arguments,
# so don't bother checking that. # so don't bother checking that.
arg = args[0] if args else kwargs
is_dict = isinstance(arg, dict) is_dict = isinstance(arg, dict)
if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]): if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
raise TypeError('CustomSelectors keys and values must be Unicode strings') raise TypeError('CustomSelectors keys and values must be Unicode strings')
elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]): elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('CustomSelectors keys and values must be Unicode strings') raise TypeError('CustomSelectors keys and values must be Unicode strings')
super(CustomSelectors, self).__init__(*args, **kwargs) super(CustomSelectors, self).__init__(arg)
class Selector(Immutable): class Selector(Immutable):
@ -239,13 +237,14 @@ class SelectorAttribute(Immutable):
class SelectorContains(Immutable): class SelectorContains(Immutable):
"""Selector contains rule.""" """Selector contains rule."""
__slots__ = ("text", "_hash") __slots__ = ("text", "own", "_hash")
def __init__(self, text): def __init__(self, text, own):
"""Initialize.""" """Initialize."""
super(SelectorContains, self).__init__( super(SelectorContains, self).__init__(
text=text text=text,
own=own
) )
@ -332,7 +331,7 @@ def _pickle(p):
def pickle_register(obj): def pickle_register(obj):
"""Allow object to be pickled.""" """Allow object to be pickled."""
util.copyreg.pickle(obj, _pickle) copyreg.pickle(obj, _pickle)
pickle_register(Selector) pickle_register(Selector)

View file

@ -1,46 +1,17 @@
"""Utility.""" """Utility."""
from __future__ import unicode_literals from functools import wraps, lru_cache
from functools import wraps
import warnings import warnings
import sys
import struct
import os
import re import re
MODULE = os.path.dirname(__file__)
PY3 = sys.version_info >= (3, 0)
PY35 = sys.version_info >= (3, 5)
PY37 = sys.version_info >= (3, 7)
if PY3:
from functools import lru_cache # noqa F401
import copyreg # noqa F401
from collections.abc import Hashable, Mapping # noqa F401
ustr = str
bstr = bytes
unichar = chr
string = str
else:
from backports.functools_lru_cache import lru_cache # noqa F401
import copy_reg as copyreg # noqa F401
from collections import Hashable, Mapping # noqa F401
ustr = unicode # noqa: F821
bstr = str
unichar = unichr # noqa: F821
string = basestring # noqa: F821
DEBUG = 0x00001 DEBUG = 0x00001
RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$') RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
LC_A = ord('a')
LC_Z = ord('z')
UC_A = ord('A') UC_A = ord('A')
UC_Z = ord('Z') UC_Z = ord('Z')
@lru_cache(maxsize=512)
def lower(string): def lower(string):
"""Lower.""" """Lower."""
@ -51,38 +22,7 @@ def lower(string):
return ''.join(new_string) return ''.join(new_string)
def upper(string): # pragma: no cover class SelectorSyntaxError(Exception):
"""Lower."""
new_string = []
for c in string:
o = ord(c)
new_string.append(chr(o - 32) if LC_A <= o <= LC_Z else c)
return ''.join(new_string)
def uchr(i):
"""Allow getting Unicode character on narrow python builds."""
try:
return unichar(i)
except ValueError: # pragma: no cover
return struct.pack('i', i).decode('utf-32')
def uord(c):
"""Get Unicode ordinal."""
if len(c) == 2: # pragma: no cover
high, low = [ord(p) for p in c]
ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
else:
ordinal = ord(c)
return ordinal
class SelectorSyntaxError(SyntaxError):
"""Syntax error in a CSS selector.""" """Syntax error in a CSS selector."""
def __init__(self, msg, pattern=None, index=None): def __init__(self, msg, pattern=None, index=None):