mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-06 13:11:15 -07:00
Update soupsieve-2.2.1
This commit is contained in:
parent
9a54fb9a44
commit
b581460b51
6 changed files with 146 additions and 233 deletions
|
@ -25,17 +25,16 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals
|
|
||||||
from .__meta__ import __version__, __version_info__ # noqa: F401
|
from .__meta__ import __version__, __version_info__ # noqa: F401
|
||||||
from . import css_parser as cp
|
from . import css_parser as cp
|
||||||
from . import css_match as cm
|
from . import css_match as cm
|
||||||
from . import css_types as ct
|
from . import css_types as ct
|
||||||
from .util import DEBUG, deprecated, SelectorSyntaxError # noqa: F401
|
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
||||||
'closest', 'comments', 'compile', 'filter', 'icomments',
|
'closest', 'compile', 'filter', 'iselect',
|
||||||
'iselect', 'match', 'select', 'select_one'
|
'match', 'select', 'select_one'
|
||||||
)
|
)
|
||||||
|
|
||||||
SoupSieve = cm.SoupSieve
|
SoupSieve = cm.SoupSieve
|
||||||
|
@ -45,11 +44,11 @@ def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
|
||||||
"""Compile CSS pattern."""
|
"""Compile CSS pattern."""
|
||||||
|
|
||||||
if namespaces is not None:
|
if namespaces is not None:
|
||||||
namespaces = ct.Namespaces(**namespaces)
|
namespaces = ct.Namespaces(namespaces)
|
||||||
|
|
||||||
custom = kwargs.get('custom')
|
custom = kwargs.get('custom')
|
||||||
if custom is not None:
|
if custom is not None:
|
||||||
custom = ct.CustomSelectors(**custom)
|
custom = ct.CustomSelectors(custom)
|
||||||
|
|
||||||
if isinstance(pattern, SoupSieve):
|
if isinstance(pattern, SoupSieve):
|
||||||
if flags:
|
if flags:
|
||||||
|
@ -87,21 +86,6 @@ def filter(select, iterable, namespaces=None, flags=0, **kwargs): # noqa: A001
|
||||||
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
||||||
|
|
||||||
|
|
||||||
@deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
|
|
||||||
def comments(tag, limit=0, flags=0, **kwargs):
|
|
||||||
"""Get comments only."""
|
|
||||||
|
|
||||||
return [comment for comment in cm.CommentsMatch(tag).get_comments(limit)]
|
|
||||||
|
|
||||||
|
|
||||||
@deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
|
|
||||||
def icomments(tag, limit=0, flags=0, **kwargs):
|
|
||||||
"""Iterate comments only."""
|
|
||||||
|
|
||||||
for comment in cm.CommentsMatch(tag).get_comments(limit):
|
|
||||||
yield comment
|
|
||||||
|
|
||||||
|
|
||||||
def select_one(select, tag, namespaces=None, flags=0, **kwargs):
|
def select_one(select, tag, namespaces=None, flags=0, **kwargs):
|
||||||
"""Select a single tag."""
|
"""Select a single tag."""
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
"""Meta related things."""
|
"""Meta related things."""
|
||||||
from __future__ import unicode_literals
|
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -154,11 +153,14 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
|
||||||
return ver
|
return ver
|
||||||
|
|
||||||
|
|
||||||
def parse_version(ver, pre=False):
|
def parse_version(ver):
|
||||||
"""Parse version into a comparable Version tuple."""
|
"""Parse version into a comparable Version tuple."""
|
||||||
|
|
||||||
m = RE_VER.match(ver)
|
m = RE_VER.match(ver)
|
||||||
|
|
||||||
|
if m is None:
|
||||||
|
raise ValueError("'{}' is not a valid version".format(ver))
|
||||||
|
|
||||||
# Handle major, minor, micro
|
# Handle major, minor, micro
|
||||||
major = int(m.group('major'))
|
major = int(m.group('major'))
|
||||||
minor = int(m.group('minor')) if m.group('minor') else 0
|
minor = int(m.group('minor')) if m.group('minor') else 0
|
||||||
|
@ -186,5 +188,5 @@ def parse_version(ver, pre=False):
|
||||||
return Version(major, minor, micro, release, pre, post, dev)
|
return Version(major, minor, micro, release, pre, post, dev)
|
||||||
|
|
||||||
|
|
||||||
__version_info__ = Version(1, 9, 5, "final")
|
__version_info__ = Version(2, 2, 1, "final")
|
||||||
__version__ = __version_info__._get_canonical()
|
__version__ = __version_info__._get_canonical()
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
"""CSS matcher."""
|
"""CSS matcher."""
|
||||||
from __future__ import unicode_literals
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from . import util
|
from . import util
|
||||||
import re
|
import re
|
||||||
from .import css_types as ct
|
from .import css_types as ct
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
import bs4
|
||||||
|
|
||||||
# Empty tag pattern (whitespace okay)
|
# Empty tag pattern (whitespace okay)
|
||||||
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
||||||
|
@ -88,57 +90,36 @@ class _DocumentNav(object):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_doc(obj):
|
def is_doc(obj):
|
||||||
"""Is `BeautifulSoup` object."""
|
"""Is `BeautifulSoup` object."""
|
||||||
|
|
||||||
import bs4
|
|
||||||
return isinstance(obj, bs4.BeautifulSoup)
|
return isinstance(obj, bs4.BeautifulSoup)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_tag(obj):
|
def is_tag(obj):
|
||||||
"""Is tag."""
|
"""Is tag."""
|
||||||
|
|
||||||
import bs4
|
|
||||||
return isinstance(obj, bs4.Tag)
|
return isinstance(obj, bs4.Tag)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_comment(obj):
|
|
||||||
"""Is comment."""
|
|
||||||
|
|
||||||
import bs4
|
|
||||||
return isinstance(obj, bs4.Comment)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_declaration(obj): # pragma: no cover
|
def is_declaration(obj): # pragma: no cover
|
||||||
"""Is declaration."""
|
"""Is declaration."""
|
||||||
|
|
||||||
import bs4
|
|
||||||
return isinstance(obj, bs4.Declaration)
|
return isinstance(obj, bs4.Declaration)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_cdata(obj):
|
def is_cdata(obj):
|
||||||
"""Is CDATA."""
|
"""Is CDATA."""
|
||||||
|
|
||||||
import bs4
|
|
||||||
return isinstance(obj, bs4.CData)
|
return isinstance(obj, bs4.CData)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_processing_instruction(obj): # pragma: no cover
|
def is_processing_instruction(obj): # pragma: no cover
|
||||||
"""Is processing instruction."""
|
"""Is processing instruction."""
|
||||||
|
|
||||||
import bs4
|
|
||||||
return isinstance(obj, bs4.ProcessingInstruction)
|
return isinstance(obj, bs4.ProcessingInstruction)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_navigable_string(obj):
|
def is_navigable_string(obj):
|
||||||
"""Is navigable string."""
|
"""Is navigable string."""
|
||||||
|
|
||||||
import bs4
|
|
||||||
return isinstance(obj, bs4.NavigableString)
|
return isinstance(obj, bs4.NavigableString)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_special_string(obj):
|
def is_special_string(obj):
|
||||||
"""Is special string."""
|
"""Is special string."""
|
||||||
|
|
||||||
import bs4
|
|
||||||
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
|
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -296,36 +277,68 @@ class _DocumentNav(object):
|
||||||
|
|
||||||
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
|
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def get_attribute_by_name(el, name, default=None):
|
def normalize_value(cls, value):
|
||||||
|
"""Normalize the value to be a string or list of strings."""
|
||||||
|
|
||||||
|
# Treat `None` as empty string.
|
||||||
|
if value is None:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
# Pass through strings
|
||||||
|
if (isinstance(value, str)):
|
||||||
|
return value
|
||||||
|
|
||||||
|
# If it's a byte string, convert it to Unicode, treating it as UTF-8.
|
||||||
|
if isinstance(value, bytes):
|
||||||
|
return value.decode("utf8")
|
||||||
|
|
||||||
|
# BeautifulSoup supports sequences of attribute values, so make sure the children are strings.
|
||||||
|
if isinstance(value, Sequence):
|
||||||
|
new_value = []
|
||||||
|
for v in value:
|
||||||
|
if isinstance(v, Sequence):
|
||||||
|
# This is most certainly a user error and will crash and burn later,
|
||||||
|
# but to avoid excessive recursion, kick out now.
|
||||||
|
new_value.append(v)
|
||||||
|
else:
|
||||||
|
# Convert the child to a string
|
||||||
|
new_value.append(cls.normalize_value(v))
|
||||||
|
return new_value
|
||||||
|
|
||||||
|
# Try and make anything else a string
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_attribute_by_name(cls, el, name, default=None):
|
||||||
"""Get attribute by name."""
|
"""Get attribute by name."""
|
||||||
|
|
||||||
value = default
|
value = default
|
||||||
if el._is_xml:
|
if el._is_xml:
|
||||||
try:
|
try:
|
||||||
value = el.attrs[name]
|
value = cls.normalize_value(el.attrs[name])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
for k, v in el.attrs.items():
|
for k, v in el.attrs.items():
|
||||||
if util.lower(k) == name:
|
if util.lower(k) == name:
|
||||||
value = v
|
value = cls.normalize_value(v)
|
||||||
break
|
break
|
||||||
return value
|
return value
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def iter_attributes(el):
|
def iter_attributes(cls, el):
|
||||||
"""Iterate attributes."""
|
"""Iterate attributes."""
|
||||||
|
|
||||||
for k, v in el.attrs.items():
|
for k, v in el.attrs.items():
|
||||||
yield k, v
|
yield k, cls.normalize_value(v)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_classes(cls, el):
|
def get_classes(cls, el):
|
||||||
"""Get classes."""
|
"""Get classes."""
|
||||||
|
|
||||||
classes = cls.get_attribute_by_name(el, 'class', [])
|
classes = cls.get_attribute_by_name(el, 'class', [])
|
||||||
if isinstance(classes, util.ustr):
|
if isinstance(classes, str):
|
||||||
classes = RE_NOT_WS.findall(classes)
|
classes = RE_NOT_WS.findall(classes)
|
||||||
return classes
|
return classes
|
||||||
|
|
||||||
|
@ -336,6 +349,11 @@ class _DocumentNav(object):
|
||||||
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
|
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_own_text(self, el, no_iframe=False):
|
||||||
|
"""Get Own Text."""
|
||||||
|
|
||||||
|
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
|
||||||
|
|
||||||
|
|
||||||
class Inputs(object):
|
class Inputs(object):
|
||||||
"""Class for parsing and validating input items."""
|
"""Class for parsing and validating input items."""
|
||||||
|
@ -963,12 +981,23 @@ class _Match(object):
|
||||||
content = None
|
content = None
|
||||||
for contain_list in contains:
|
for contain_list in contains:
|
||||||
if content is None:
|
if content is None:
|
||||||
content = self.get_text(el, no_iframe=self.is_html)
|
if contain_list.own:
|
||||||
|
content = self.get_own_text(el, no_iframe=self.is_html)
|
||||||
|
else:
|
||||||
|
content = self.get_text(el, no_iframe=self.is_html)
|
||||||
found = False
|
found = False
|
||||||
for text in contain_list.text:
|
for text in contain_list.text:
|
||||||
if text in content:
|
if contain_list.own:
|
||||||
found = True
|
for c in content:
|
||||||
break
|
if text in c:
|
||||||
|
found = True
|
||||||
|
break
|
||||||
|
if found:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if text in content:
|
||||||
|
found = True
|
||||||
|
break
|
||||||
if not found:
|
if not found:
|
||||||
match = False
|
match = False
|
||||||
return match
|
return match
|
||||||
|
@ -1429,30 +1458,6 @@ class CSSMatch(_DocumentNav, _Match):
|
||||||
"""The Beautiful Soup CSS match class."""
|
"""The Beautiful Soup CSS match class."""
|
||||||
|
|
||||||
|
|
||||||
class CommentsMatch(_DocumentNav):
|
|
||||||
"""Comments matcher."""
|
|
||||||
|
|
||||||
def __init__(self, el):
|
|
||||||
"""Initialize."""
|
|
||||||
|
|
||||||
self.assert_valid_input(el)
|
|
||||||
self.tag = el
|
|
||||||
|
|
||||||
def get_comments(self, limit=0):
|
|
||||||
"""Get comments."""
|
|
||||||
|
|
||||||
if limit < 1:
|
|
||||||
limit = None
|
|
||||||
|
|
||||||
for child in self.get_descendants(self.tag, tags=False):
|
|
||||||
if self.is_comment(child):
|
|
||||||
yield child
|
|
||||||
if limit is not None:
|
|
||||||
limit -= 1
|
|
||||||
if limit < 1:
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
class SoupSieve(ct.Immutable):
|
class SoupSieve(ct.Immutable):
|
||||||
"""Compiled Soup Sieve selector matching object."""
|
"""Compiled Soup Sieve selector matching object."""
|
||||||
|
|
||||||
|
@ -1496,19 +1501,6 @@ class SoupSieve(ct.Immutable):
|
||||||
else:
|
else:
|
||||||
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
|
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
|
||||||
|
|
||||||
@util.deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
|
|
||||||
def comments(self, tag, limit=0):
|
|
||||||
"""Get comments only."""
|
|
||||||
|
|
||||||
return [comment for comment in CommentsMatch(tag).get_comments(limit)]
|
|
||||||
|
|
||||||
@util.deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
|
|
||||||
def icomments(self, tag, limit=0):
|
|
||||||
"""Iterate comments only."""
|
|
||||||
|
|
||||||
for comment in CommentsMatch(tag).get_comments(limit):
|
|
||||||
yield comment
|
|
||||||
|
|
||||||
def select_one(self, tag):
|
def select_one(self, tag):
|
||||||
"""Select a single tag."""
|
"""Select a single tag."""
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
"""CSS selector parser."""
|
"""CSS selector parser."""
|
||||||
from __future__ import unicode_literals
|
|
||||||
import re
|
import re
|
||||||
|
from functools import lru_cache
|
||||||
from . import util
|
from . import util
|
||||||
from . import css_match as cm
|
from . import css_match as cm
|
||||||
from . import css_types as ct
|
from . import css_types as ct
|
||||||
from .util import SelectorSyntaxError
|
from .util import SelectorSyntaxError
|
||||||
|
import warnings
|
||||||
|
|
||||||
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
||||||
|
|
||||||
|
@ -59,6 +60,8 @@ PSEUDO_SIMPLE_NO_MATCH = {
|
||||||
# Complex pseudo classes that take selector lists
|
# Complex pseudo classes that take selector lists
|
||||||
PSEUDO_COMPLEX = {
|
PSEUDO_COMPLEX = {
|
||||||
':contains',
|
':contains',
|
||||||
|
':-soup-contains',
|
||||||
|
':-soup-contains-own',
|
||||||
':has',
|
':has',
|
||||||
':is',
|
':is',
|
||||||
':matches',
|
':matches',
|
||||||
|
@ -117,9 +120,11 @@ PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
|
||||||
# Classes (`.class`)
|
# Classes (`.class`)
|
||||||
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
|
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
|
||||||
# Prefix:Tag (`prefix|tag`)
|
# Prefix:Tag (`prefix|tag`)
|
||||||
PAT_TAG = r'(?:(?:{ident}|\*)?\|)?(?:{ident}|\*)'.format(ident=IDENTIFIER)
|
PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
|
||||||
# Attributes (`[attr]`, `[attr=value]`, etc.)
|
# Attributes (`[attr]`, `[attr=value]`, etc.)
|
||||||
PAT_ATTR = r'\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}'.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
|
PAT_ATTR = r'''
|
||||||
|
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
|
||||||
|
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
|
||||||
# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
|
# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
|
||||||
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
|
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
|
||||||
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
|
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
|
||||||
|
@ -196,7 +201,7 @@ FLG_PLACEHOLDER_SHOWN = 0x200
|
||||||
_MAXCACHE = 500
|
_MAXCACHE = 500
|
||||||
|
|
||||||
|
|
||||||
@util.lru_cache(maxsize=_MAXCACHE)
|
@lru_cache(maxsize=_MAXCACHE)
|
||||||
def _cached_css_compile(pattern, namespaces, custom, flags):
|
def _cached_css_compile(pattern, namespaces, custom, flags):
|
||||||
"""Cached CSS compile."""
|
"""Cached CSS compile."""
|
||||||
|
|
||||||
|
@ -245,7 +250,7 @@ def css_unescape(content, string=False):
|
||||||
codepoint = int(m.group(1)[1:], 16)
|
codepoint = int(m.group(1)[1:], 16)
|
||||||
if codepoint == 0:
|
if codepoint == 0:
|
||||||
codepoint = UNICODE_REPLACEMENT_CHAR
|
codepoint = UNICODE_REPLACEMENT_CHAR
|
||||||
value = util.uchr(codepoint)
|
value = chr(codepoint)
|
||||||
elif m.group(2):
|
elif m.group(2):
|
||||||
value = m.group(2)[1:]
|
value = m.group(2)[1:]
|
||||||
elif m.group(3):
|
elif m.group(3):
|
||||||
|
@ -269,7 +274,7 @@ def escape(ident):
|
||||||
string.append('\\{}'.format(ident))
|
string.append('\\{}'.format(ident))
|
||||||
else:
|
else:
|
||||||
for index, c in enumerate(ident):
|
for index, c in enumerate(ident):
|
||||||
codepoint = util.uord(c)
|
codepoint = ord(c)
|
||||||
if codepoint == 0x00:
|
if codepoint == 0x00:
|
||||||
string.append('\ufffd')
|
string.append('\ufffd')
|
||||||
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
|
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
|
||||||
|
@ -300,12 +305,7 @@ class SelectorPattern(object):
|
||||||
|
|
||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
def enabled(self, flags):
|
def match(self, selector, index, flags):
|
||||||
"""Enabled."""
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def match(self, selector, index):
|
|
||||||
"""Match the selector."""
|
"""Match the selector."""
|
||||||
|
|
||||||
return self.re_pattern.match(selector, index)
|
return self.re_pattern.match(selector, index)
|
||||||
|
@ -320,7 +320,7 @@ class SpecialPseudoPattern(SelectorPattern):
|
||||||
self.patterns = {}
|
self.patterns = {}
|
||||||
for p in patterns:
|
for p in patterns:
|
||||||
name = p[0]
|
name = p[0]
|
||||||
pattern = SelectorPattern(name, p[2])
|
pattern = p[3](name, p[2])
|
||||||
for pseudo in p[1]:
|
for pseudo in p[1]:
|
||||||
self.patterns[pseudo] = pattern
|
self.patterns[pseudo] = pattern
|
||||||
|
|
||||||
|
@ -332,12 +332,7 @@ class SpecialPseudoPattern(SelectorPattern):
|
||||||
|
|
||||||
return self.matched_name.get_name()
|
return self.matched_name.get_name()
|
||||||
|
|
||||||
def enabled(self, flags):
|
def match(self, selector, index, flags):
|
||||||
"""Enabled."""
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def match(self, selector, index):
|
|
||||||
"""Match the selector."""
|
"""Match the selector."""
|
||||||
|
|
||||||
pseudo = None
|
pseudo = None
|
||||||
|
@ -346,7 +341,7 @@ class SpecialPseudoPattern(SelectorPattern):
|
||||||
name = util.lower(css_unescape(m.group('name')))
|
name = util.lower(css_unescape(m.group('name')))
|
||||||
pattern = self.patterns.get(name)
|
pattern = self.patterns.get(name)
|
||||||
if pattern:
|
if pattern:
|
||||||
pseudo = pattern.match(selector, index)
|
pseudo = pattern.match(selector, index, flags)
|
||||||
if pseudo:
|
if pseudo:
|
||||||
self.matched_name = pattern
|
self.matched_name = pattern
|
||||||
|
|
||||||
|
@ -429,11 +424,16 @@ class CSSParser(object):
|
||||||
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
|
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
|
||||||
SpecialPseudoPattern(
|
SpecialPseudoPattern(
|
||||||
(
|
(
|
||||||
("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS),
|
(
|
||||||
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD),
|
"pseudo_contains",
|
||||||
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE),
|
(':contains', ':-soup-contains', ':-soup-contains-own'),
|
||||||
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG),
|
PAT_PSEUDO_CONTAINS,
|
||||||
("pseudo_dir", (':dir',), PAT_PSEUDO_DIR)
|
SelectorPattern
|
||||||
|
),
|
||||||
|
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
|
||||||
|
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
|
||||||
|
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
|
||||||
|
("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
|
SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
|
||||||
|
@ -461,15 +461,11 @@ class CSSParser(object):
|
||||||
inverse = False
|
inverse = False
|
||||||
op = m.group('cmp')
|
op = m.group('cmp')
|
||||||
case = util.lower(m.group('case')) if m.group('case') else None
|
case = util.lower(m.group('case')) if m.group('case') else None
|
||||||
parts = [css_unescape(a) for a in m.group('ns_attr').split('|')]
|
ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
|
||||||
ns = ''
|
attr = css_unescape(m.group('attr_name'))
|
||||||
is_type = False
|
is_type = False
|
||||||
pattern2 = None
|
pattern2 = None
|
||||||
if len(parts) > 1:
|
|
||||||
ns = parts[0]
|
|
||||||
attr = parts[1]
|
|
||||||
else:
|
|
||||||
attr = parts[0]
|
|
||||||
if case:
|
if case:
|
||||||
flags = re.I if case == 'i' else 0
|
flags = re.I if case == 'i' else 0
|
||||||
elif util.lower(attr) == 'type':
|
elif util.lower(attr) == 'type':
|
||||||
|
@ -532,13 +528,8 @@ class CSSParser(object):
|
||||||
def parse_tag_pattern(self, sel, m, has_selector):
|
def parse_tag_pattern(self, sel, m, has_selector):
|
||||||
"""Parse tag pattern from regex match."""
|
"""Parse tag pattern from regex match."""
|
||||||
|
|
||||||
parts = [css_unescape(x) for x in m.group(0).split('|')]
|
prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
|
||||||
if len(parts) > 1:
|
tag = css_unescape(m.group('tag_name'))
|
||||||
prefix = parts[0]
|
|
||||||
tag = parts[1]
|
|
||||||
else:
|
|
||||||
tag = parts[0]
|
|
||||||
prefix = None
|
|
||||||
sel.tag = ct.SelectorTag(tag, prefix)
|
sel.tag = ct.SelectorTag(tag, prefix)
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
@ -817,7 +808,14 @@ class CSSParser(object):
|
||||||
def parse_pseudo_contains(self, sel, m, has_selector):
|
def parse_pseudo_contains(self, sel, m, has_selector):
|
||||||
"""Parse contains."""
|
"""Parse contains."""
|
||||||
|
|
||||||
values = m.group('values')
|
pseudo = util.lower(css_unescape(m.group('name')))
|
||||||
|
if pseudo == ":contains":
|
||||||
|
warnings.warn(
|
||||||
|
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
|
||||||
|
FutureWarning
|
||||||
|
)
|
||||||
|
contains_own = pseudo == ":-soup-contains-own"
|
||||||
|
values = css_unescape(m.group('values'))
|
||||||
patterns = []
|
patterns = []
|
||||||
for token in RE_VALUES.finditer(values):
|
for token in RE_VALUES.finditer(values):
|
||||||
if token.group('split'):
|
if token.group('split'):
|
||||||
|
@ -828,7 +826,7 @@ class CSSParser(object):
|
||||||
else:
|
else:
|
||||||
value = css_unescape(value)
|
value = css_unescape(value)
|
||||||
patterns.append(value)
|
patterns.append(value)
|
||||||
sel.contains.append(ct.SelectorContains(tuple(patterns)))
|
sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own))
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
|
@ -918,7 +916,7 @@ class CSSParser(object):
|
||||||
elif key == 'pseudo_class':
|
elif key == 'pseudo_class':
|
||||||
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
|
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
|
||||||
elif key == 'pseudo_element':
|
elif key == 'pseudo_element':
|
||||||
raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0)))
|
raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
|
||||||
elif key == 'pseudo_contains':
|
elif key == 'pseudo_contains':
|
||||||
has_selector = self.parse_pseudo_contains(sel, m, has_selector)
|
has_selector = self.parse_pseudo_contains(sel, m, has_selector)
|
||||||
elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
|
elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
|
||||||
|
@ -1027,9 +1025,7 @@ class CSSParser(object):
|
||||||
while index <= end:
|
while index <= end:
|
||||||
m = None
|
m = None
|
||||||
for v in self.css_tokens:
|
for v in self.css_tokens:
|
||||||
if not v.enabled(self.flags): # pragma: no cover
|
m = v.match(pattern, index, self.flags)
|
||||||
continue
|
|
||||||
m = v.match(pattern, index)
|
|
||||||
if m:
|
if m:
|
||||||
name = v.get_name()
|
name = v.get_name()
|
||||||
if self.debug: # pragma: no cover
|
if self.debug: # pragma: no cover
|
||||||
|
@ -1067,7 +1063,7 @@ class CSSParser(object):
|
||||||
|
|
||||||
# CSS pattern for `:link` and `:any-link`
|
# CSS pattern for `:link` and `:any-link`
|
||||||
CSS_LINK = CSSParser(
|
CSS_LINK = CSSParser(
|
||||||
'html|*:is(a, area, link)[href]'
|
'html|*:is(a, area)[href]'
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern for `:checked`
|
# CSS pattern for `:checked`
|
||||||
CSS_CHECKED = CSSParser(
|
CSS_CHECKED = CSSParser(
|
||||||
|
@ -1098,23 +1094,23 @@ CSS_INDETERMINATE = CSSParser(
|
||||||
This pattern must be at the end.
|
This pattern must be at the end.
|
||||||
Special logic is applied to the last selector.
|
Special logic is applied to the last selector.
|
||||||
*/
|
*/
|
||||||
html|input[type="radio"][name][name!='']:not([checked])
|
html|input[type="radio"][name]:not([name='']):not([checked])
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
|
||||||
# CSS pattern for `:disabled`
|
# CSS pattern for `:disabled`
|
||||||
CSS_DISABLED = CSSParser(
|
CSS_DISABLED = CSSParser(
|
||||||
'''
|
'''
|
||||||
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
|
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
|
||||||
html|optgroup[disabled] > html|option,
|
html|optgroup[disabled] > html|option,
|
||||||
html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
|
html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
|
||||||
html|fieldset[disabled] >
|
html|fieldset[disabled] >
|
||||||
html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
|
html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern for `:enabled`
|
# CSS pattern for `:enabled`
|
||||||
CSS_ENABLED = CSSParser(
|
CSS_ENABLED = CSSParser(
|
||||||
'''
|
'''
|
||||||
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
|
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern for `:required`
|
# CSS pattern for `:required`
|
||||||
|
@ -1138,8 +1134,8 @@ CSS_PLACEHOLDER_SHOWN = CSSParser(
|
||||||
[type=email],
|
[type=email],
|
||||||
[type=password],
|
[type=password],
|
||||||
[type=number]
|
[type=number]
|
||||||
)[placeholder][placeholder!='']:is(:not([value]), [value=""]),
|
)[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
|
||||||
html|textarea[placeholder][placeholder!='']
|
html|textarea[placeholder]:not([placeholder=''])
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
|
||||||
# CSS pattern default for `:nth-child` "of S" feature
|
# CSS pattern default for `:nth-child` "of S" feature
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
"""CSS selector structure items."""
|
"""CSS selector structure items."""
|
||||||
from __future__ import unicode_literals
|
import copyreg
|
||||||
from . import util
|
from collections.abc import Hashable, Mapping
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
'Selector',
|
'Selector',
|
||||||
|
@ -86,21 +86,21 @@ class Immutable(object):
|
||||||
__str__ = __repr__
|
__str__ = __repr__
|
||||||
|
|
||||||
|
|
||||||
class ImmutableDict(util.Mapping):
|
class ImmutableDict(Mapping):
|
||||||
"""Hashable, immutable dictionary."""
|
"""Hashable, immutable dictionary."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, arg):
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
arg = args[0] if args else kwargs
|
arg
|
||||||
is_dict = isinstance(arg, dict)
|
is_dict = isinstance(arg, dict)
|
||||||
if (
|
if (
|
||||||
is_dict and not all([isinstance(v, util.Hashable) for v in arg.values()]) or
|
is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
|
||||||
not is_dict and not all([isinstance(k, util.Hashable) and isinstance(v, util.Hashable) for k, v in arg])
|
not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
|
||||||
):
|
):
|
||||||
raise TypeError('All values must be hashable')
|
raise TypeError('All values must be hashable')
|
||||||
|
|
||||||
self._d = dict(*args, **kwargs)
|
self._d = dict(arg)
|
||||||
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
|
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
@ -133,39 +133,37 @@ class ImmutableDict(util.Mapping):
|
||||||
class Namespaces(ImmutableDict):
|
class Namespaces(ImmutableDict):
|
||||||
"""Namespaces."""
|
"""Namespaces."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, arg):
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
# If there are arguments, check the first index.
|
# If there are arguments, check the first index.
|
||||||
# `super` should fail if the user gave multiple arguments,
|
# `super` should fail if the user gave multiple arguments,
|
||||||
# so don't bother checking that.
|
# so don't bother checking that.
|
||||||
arg = args[0] if args else kwargs
|
|
||||||
is_dict = isinstance(arg, dict)
|
is_dict = isinstance(arg, dict)
|
||||||
if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
|
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
|
||||||
raise TypeError('Namespace keys and values must be Unicode strings')
|
raise TypeError('Namespace keys and values must be Unicode strings')
|
||||||
elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
|
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
||||||
raise TypeError('Namespace keys and values must be Unicode strings')
|
raise TypeError('Namespace keys and values must be Unicode strings')
|
||||||
|
|
||||||
super(Namespaces, self).__init__(*args, **kwargs)
|
super(Namespaces, self).__init__(arg)
|
||||||
|
|
||||||
|
|
||||||
class CustomSelectors(ImmutableDict):
|
class CustomSelectors(ImmutableDict):
|
||||||
"""Custom selectors."""
|
"""Custom selectors."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, arg):
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
# If there are arguments, check the first index.
|
# If there are arguments, check the first index.
|
||||||
# `super` should fail if the user gave multiple arguments,
|
# `super` should fail if the user gave multiple arguments,
|
||||||
# so don't bother checking that.
|
# so don't bother checking that.
|
||||||
arg = args[0] if args else kwargs
|
|
||||||
is_dict = isinstance(arg, dict)
|
is_dict = isinstance(arg, dict)
|
||||||
if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
|
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
|
||||||
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
||||||
elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
|
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
||||||
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
||||||
|
|
||||||
super(CustomSelectors, self).__init__(*args, **kwargs)
|
super(CustomSelectors, self).__init__(arg)
|
||||||
|
|
||||||
|
|
||||||
class Selector(Immutable):
|
class Selector(Immutable):
|
||||||
|
@ -239,13 +237,14 @@ class SelectorAttribute(Immutable):
|
||||||
class SelectorContains(Immutable):
|
class SelectorContains(Immutable):
|
||||||
"""Selector contains rule."""
|
"""Selector contains rule."""
|
||||||
|
|
||||||
__slots__ = ("text", "_hash")
|
__slots__ = ("text", "own", "_hash")
|
||||||
|
|
||||||
def __init__(self, text):
|
def __init__(self, text, own):
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(SelectorContains, self).__init__(
|
super(SelectorContains, self).__init__(
|
||||||
text=text
|
text=text,
|
||||||
|
own=own
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -332,7 +331,7 @@ def _pickle(p):
|
||||||
def pickle_register(obj):
|
def pickle_register(obj):
|
||||||
"""Allow object to be pickled."""
|
"""Allow object to be pickled."""
|
||||||
|
|
||||||
util.copyreg.pickle(obj, _pickle)
|
copyreg.pickle(obj, _pickle)
|
||||||
|
|
||||||
|
|
||||||
pickle_register(Selector)
|
pickle_register(Selector)
|
||||||
|
|
|
@ -1,46 +1,17 @@
|
||||||
"""Utility."""
|
"""Utility."""
|
||||||
from __future__ import unicode_literals
|
from functools import wraps, lru_cache
|
||||||
from functools import wraps
|
|
||||||
import warnings
|
import warnings
|
||||||
import sys
|
|
||||||
import struct
|
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
MODULE = os.path.dirname(__file__)
|
|
||||||
|
|
||||||
PY3 = sys.version_info >= (3, 0)
|
|
||||||
PY35 = sys.version_info >= (3, 5)
|
|
||||||
PY37 = sys.version_info >= (3, 7)
|
|
||||||
|
|
||||||
if PY3:
|
|
||||||
from functools import lru_cache # noqa F401
|
|
||||||
import copyreg # noqa F401
|
|
||||||
from collections.abc import Hashable, Mapping # noqa F401
|
|
||||||
|
|
||||||
ustr = str
|
|
||||||
bstr = bytes
|
|
||||||
unichar = chr
|
|
||||||
string = str
|
|
||||||
else:
|
|
||||||
from backports.functools_lru_cache import lru_cache # noqa F401
|
|
||||||
import copy_reg as copyreg # noqa F401
|
|
||||||
from collections import Hashable, Mapping # noqa F401
|
|
||||||
|
|
||||||
ustr = unicode # noqa: F821
|
|
||||||
bstr = str
|
|
||||||
unichar = unichr # noqa: F821
|
|
||||||
string = basestring # noqa: F821
|
|
||||||
|
|
||||||
DEBUG = 0x00001
|
DEBUG = 0x00001
|
||||||
|
|
||||||
RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
|
RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
|
||||||
|
|
||||||
LC_A = ord('a')
|
|
||||||
LC_Z = ord('z')
|
|
||||||
UC_A = ord('A')
|
UC_A = ord('A')
|
||||||
UC_Z = ord('Z')
|
UC_Z = ord('Z')
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=512)
|
||||||
def lower(string):
|
def lower(string):
|
||||||
"""Lower."""
|
"""Lower."""
|
||||||
|
|
||||||
|
@ -51,38 +22,7 @@ def lower(string):
|
||||||
return ''.join(new_string)
|
return ''.join(new_string)
|
||||||
|
|
||||||
|
|
||||||
def upper(string): # pragma: no cover
|
class SelectorSyntaxError(Exception):
|
||||||
"""Lower."""
|
|
||||||
|
|
||||||
new_string = []
|
|
||||||
for c in string:
|
|
||||||
o = ord(c)
|
|
||||||
new_string.append(chr(o - 32) if LC_A <= o <= LC_Z else c)
|
|
||||||
return ''.join(new_string)
|
|
||||||
|
|
||||||
|
|
||||||
def uchr(i):
|
|
||||||
"""Allow getting Unicode character on narrow python builds."""
|
|
||||||
|
|
||||||
try:
|
|
||||||
return unichar(i)
|
|
||||||
except ValueError: # pragma: no cover
|
|
||||||
return struct.pack('i', i).decode('utf-32')
|
|
||||||
|
|
||||||
|
|
||||||
def uord(c):
|
|
||||||
"""Get Unicode ordinal."""
|
|
||||||
|
|
||||||
if len(c) == 2: # pragma: no cover
|
|
||||||
high, low = [ord(p) for p in c]
|
|
||||||
ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
|
|
||||||
else:
|
|
||||||
ordinal = ord(c)
|
|
||||||
|
|
||||||
return ordinal
|
|
||||||
|
|
||||||
|
|
||||||
class SelectorSyntaxError(SyntaxError):
|
|
||||||
"""Syntax error in a CSS selector."""
|
"""Syntax error in a CSS selector."""
|
||||||
|
|
||||||
def __init__(self, msg, pattern=None, index=None):
|
def __init__(self, msg, pattern=None, index=None):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue