mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-07-08 06:00:51 -07:00
Update soupsieve==2.3.1
This commit is contained in:
parent
dcfd8abddd
commit
36b55398a8
8 changed files with 791 additions and 375 deletions
|
@ -2,11 +2,10 @@
|
|||
from datetime import datetime
|
||||
from . import util
|
||||
import re
|
||||
from .import css_types as ct
|
||||
from . import css_types as ct
|
||||
import unicodedata
|
||||
from collections.abc import Sequence
|
||||
|
||||
import bs4
|
||||
import bs4 # type: ignore[import]
|
||||
from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast
|
||||
|
||||
# Empty tag pattern (whitespace okay)
|
||||
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
||||
|
@ -56,7 +55,7 @@ FEB_LEAP_MONTH = 29
|
|||
DAYS_IN_WEEK = 7
|
||||
|
||||
|
||||
class _FakeParent(object):
|
||||
class _FakeParent:
|
||||
"""
|
||||
Fake parent class.
|
||||
|
||||
|
@ -65,22 +64,22 @@ class _FakeParent(object):
|
|||
fake parent so we can traverse the root element as a child.
|
||||
"""
|
||||
|
||||
def __init__(self, element):
|
||||
def __init__(self, element: 'bs4.Tag') -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.contents = [element]
|
||||
|
||||
def __len__(self):
|
||||
def __len__(self) -> 'bs4.PageElement':
|
||||
"""Length."""
|
||||
|
||||
return len(self.contents)
|
||||
|
||||
|
||||
class _DocumentNav(object):
|
||||
class _DocumentNav:
|
||||
"""Navigate a Beautiful Soup document."""
|
||||
|
||||
@classmethod
|
||||
def assert_valid_input(cls, tag):
|
||||
def assert_valid_input(cls, tag: Any) -> None:
|
||||
"""Check if valid input tag or document."""
|
||||
|
||||
# Fail on unexpected types.
|
||||
|
@ -88,64 +87,67 @@ class _DocumentNav(object):
|
|||
raise TypeError("Expected a BeautifulSoup 'Tag', but instead recieved type {}".format(type(tag)))
|
||||
|
||||
@staticmethod
|
||||
def is_doc(obj):
|
||||
def is_doc(obj: 'bs4.Tag') -> bool:
|
||||
"""Is `BeautifulSoup` object."""
|
||||
return isinstance(obj, bs4.BeautifulSoup)
|
||||
|
||||
@staticmethod
|
||||
def is_tag(obj):
|
||||
def is_tag(obj: 'bs4.PageElement') -> bool:
|
||||
"""Is tag."""
|
||||
return isinstance(obj, bs4.Tag)
|
||||
|
||||
@staticmethod
|
||||
def is_declaration(obj): # pragma: no cover
|
||||
def is_declaration(obj: 'bs4.PageElement') -> bool: # pragma: no cover
|
||||
"""Is declaration."""
|
||||
return isinstance(obj, bs4.Declaration)
|
||||
|
||||
@staticmethod
|
||||
def is_cdata(obj):
|
||||
def is_cdata(obj: 'bs4.PageElement') -> bool:
|
||||
"""Is CDATA."""
|
||||
return isinstance(obj, bs4.CData)
|
||||
|
||||
@staticmethod
|
||||
def is_processing_instruction(obj): # pragma: no cover
|
||||
def is_processing_instruction(obj: 'bs4.PageElement') -> bool: # pragma: no cover
|
||||
"""Is processing instruction."""
|
||||
return isinstance(obj, bs4.ProcessingInstruction)
|
||||
|
||||
@staticmethod
|
||||
def is_navigable_string(obj):
|
||||
def is_navigable_string(obj: 'bs4.PageElement') -> bool:
|
||||
"""Is navigable string."""
|
||||
return isinstance(obj, bs4.NavigableString)
|
||||
|
||||
@staticmethod
|
||||
def is_special_string(obj):
|
||||
def is_special_string(obj: 'bs4.PageElement') -> bool:
|
||||
"""Is special string."""
|
||||
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
|
||||
|
||||
@classmethod
|
||||
def is_content_string(cls, obj):
|
||||
def is_content_string(cls, obj: 'bs4.PageElement') -> bool:
|
||||
"""Check if node is content string."""
|
||||
|
||||
return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
|
||||
|
||||
@staticmethod
|
||||
def create_fake_parent(el):
|
||||
def create_fake_parent(el: 'bs4.Tag') -> _FakeParent:
|
||||
"""Create fake parent for a given element."""
|
||||
|
||||
return _FakeParent(el)
|
||||
|
||||
@staticmethod
|
||||
def is_xml_tree(el):
|
||||
def is_xml_tree(el: 'bs4.Tag') -> bool:
|
||||
"""Check if element (or document) is from a XML tree."""
|
||||
|
||||
return el._is_xml
|
||||
return bool(el._is_xml)
|
||||
|
||||
def is_iframe(self, el):
|
||||
def is_iframe(self, el: 'bs4.Tag') -> bool:
|
||||
"""Check if element is an `iframe`."""
|
||||
|
||||
return ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and self.is_html_tag(el)
|
||||
return bool(
|
||||
((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and
|
||||
self.is_html_tag(el) # type: ignore[attr-defined]
|
||||
)
|
||||
|
||||
def is_root(self, el):
|
||||
def is_root(self, el: 'bs4.Tag') -> bool:
|
||||
"""
|
||||
Return whether element is a root element.
|
||||
|
||||
|
@ -153,19 +155,26 @@ class _DocumentNav(object):
|
|||
and we check if it is the root element under an `iframe`.
|
||||
"""
|
||||
|
||||
root = self.root and self.root is el
|
||||
root = self.root and self.root is el # type: ignore[attr-defined]
|
||||
if not root:
|
||||
parent = self.get_parent(el)
|
||||
root = parent is not None and self.is_html and self.is_iframe(parent)
|
||||
root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
|
||||
return root
|
||||
|
||||
def get_contents(self, el, no_iframe=False):
|
||||
def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']:
|
||||
"""Get contents or contents in reverse."""
|
||||
if not no_iframe or not self.is_iframe(el):
|
||||
for content in el.contents:
|
||||
yield content
|
||||
|
||||
def get_children(self, el, start=None, reverse=False, tags=True, no_iframe=False):
|
||||
def get_children(
|
||||
self,
|
||||
el: 'bs4.Tag',
|
||||
start: Optional[int] = None,
|
||||
reverse: bool = False,
|
||||
tags: bool = True,
|
||||
no_iframe: bool = False
|
||||
) -> Iterator['bs4.PageElement']:
|
||||
"""Get children."""
|
||||
|
||||
if not no_iframe or not self.is_iframe(el):
|
||||
|
@ -184,7 +193,12 @@ class _DocumentNav(object):
|
|||
if not tags or self.is_tag(node):
|
||||
yield node
|
||||
|
||||
def get_descendants(self, el, tags=True, no_iframe=False):
|
||||
def get_descendants(
|
||||
self,
|
||||
el: 'bs4.Tag',
|
||||
tags: bool = True,
|
||||
no_iframe: bool = False
|
||||
) -> Iterator['bs4.PageElement']:
|
||||
"""Get descendants."""
|
||||
|
||||
if not no_iframe or not self.is_iframe(el):
|
||||
|
@ -215,7 +229,7 @@ class _DocumentNav(object):
|
|||
if not tags or is_tag:
|
||||
yield child
|
||||
|
||||
def get_parent(self, el, no_iframe=False):
|
||||
def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag':
|
||||
"""Get parent."""
|
||||
|
||||
parent = el.parent
|
||||
|
@ -224,25 +238,25 @@ class _DocumentNav(object):
|
|||
return parent
|
||||
|
||||
@staticmethod
|
||||
def get_tag_name(el):
|
||||
def get_tag_name(el: 'bs4.Tag') -> Optional[str]:
|
||||
"""Get tag."""
|
||||
|
||||
return el.name
|
||||
return cast(Optional[str], el.name)
|
||||
|
||||
@staticmethod
|
||||
def get_prefix_name(el):
|
||||
def get_prefix_name(el: 'bs4.Tag') -> Optional[str]:
|
||||
"""Get prefix."""
|
||||
|
||||
return el.prefix
|
||||
return cast(Optional[str], el.prefix)
|
||||
|
||||
@staticmethod
|
||||
def get_uri(el):
|
||||
def get_uri(el: 'bs4.Tag') -> Optional[str]:
|
||||
"""Get namespace `URI`."""
|
||||
|
||||
return el.namespace
|
||||
return cast(Optional[str], el.namespace)
|
||||
|
||||
@classmethod
|
||||
def get_next(cls, el, tags=True):
|
||||
def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
|
||||
"""Get next sibling tag."""
|
||||
|
||||
sibling = el.next_sibling
|
||||
|
@ -251,7 +265,7 @@ class _DocumentNav(object):
|
|||
return sibling
|
||||
|
||||
@classmethod
|
||||
def get_previous(cls, el, tags=True):
|
||||
def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
|
||||
"""Get previous sibling tag."""
|
||||
|
||||
sibling = el.previous_sibling
|
||||
|
@ -260,7 +274,7 @@ class _DocumentNav(object):
|
|||
return sibling
|
||||
|
||||
@staticmethod
|
||||
def has_html_ns(el):
|
||||
def has_html_ns(el: 'bs4.Tag') -> bool:
|
||||
"""
|
||||
Check if element has an HTML namespace.
|
||||
|
||||
|
@ -269,16 +283,16 @@ class _DocumentNav(object):
|
|||
"""
|
||||
|
||||
ns = getattr(el, 'namespace') if el else None
|
||||
return ns and ns == NS_XHTML
|
||||
return bool(ns and ns == NS_XHTML)
|
||||
|
||||
@staticmethod
|
||||
def split_namespace(el, attr_name):
|
||||
def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""Return namespace and attribute name without the prefix."""
|
||||
|
||||
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
|
||||
|
||||
@classmethod
|
||||
def normalize_value(cls, value):
|
||||
def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]:
|
||||
"""Normalize the value to be a string or list of strings."""
|
||||
|
||||
# Treat `None` as empty string.
|
||||
|
@ -297,20 +311,26 @@ class _DocumentNav(object):
|
|||
if isinstance(value, Sequence):
|
||||
new_value = []
|
||||
for v in value:
|
||||
if isinstance(v, Sequence):
|
||||
# This is most certainly a user error and will crash and burn later,
|
||||
# but to avoid excessive recursion, kick out now.
|
||||
new_value.append(v)
|
||||
if not isinstance(v, (str, bytes)) and isinstance(v, Sequence):
|
||||
# This is most certainly a user error and will crash and burn later.
|
||||
# To keep things working, we'll do what we do with all objects,
|
||||
# And convert them to strings.
|
||||
new_value.append(str(v))
|
||||
else:
|
||||
# Convert the child to a string
|
||||
new_value.append(cls.normalize_value(v))
|
||||
new_value.append(cast(str, cls.normalize_value(v)))
|
||||
return new_value
|
||||
|
||||
# Try and make anything else a string
|
||||
return str(value)
|
||||
|
||||
@classmethod
|
||||
def get_attribute_by_name(cls, el, name, default=None):
|
||||
def get_attribute_by_name(
|
||||
cls,
|
||||
el: 'bs4.Tag',
|
||||
name: str,
|
||||
default: Optional[Union[str, Sequence[str]]] = None
|
||||
) -> Optional[Union[str, Sequence[str]]]:
|
||||
"""Get attribute by name."""
|
||||
|
||||
value = default
|
||||
|
@ -327,39 +347,39 @@ class _DocumentNav(object):
|
|||
return value
|
||||
|
||||
@classmethod
|
||||
def iter_attributes(cls, el):
|
||||
def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]:
|
||||
"""Iterate attributes."""
|
||||
|
||||
for k, v in el.attrs.items():
|
||||
yield k, cls.normalize_value(v)
|
||||
|
||||
@classmethod
|
||||
def get_classes(cls, el):
|
||||
def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]:
|
||||
"""Get classes."""
|
||||
|
||||
classes = cls.get_attribute_by_name(el, 'class', [])
|
||||
if isinstance(classes, str):
|
||||
classes = RE_NOT_WS.findall(classes)
|
||||
return classes
|
||||
return cast(Sequence[str], classes)
|
||||
|
||||
def get_text(self, el, no_iframe=False):
|
||||
def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str:
|
||||
"""Get text."""
|
||||
|
||||
return ''.join(
|
||||
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
|
||||
)
|
||||
|
||||
def get_own_text(self, el, no_iframe=False):
|
||||
def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]:
|
||||
"""Get Own Text."""
|
||||
|
||||
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
|
||||
|
||||
|
||||
class Inputs(object):
|
||||
class Inputs:
|
||||
"""Class for parsing and validating input items."""
|
||||
|
||||
@staticmethod
|
||||
def validate_day(year, month, day):
|
||||
def validate_day(year: int, month: int, day: int) -> bool:
|
||||
"""Validate day."""
|
||||
|
||||
max_days = LONG_MONTH
|
||||
|
@ -370,7 +390,7 @@ class Inputs(object):
|
|||
return 1 <= day <= max_days
|
||||
|
||||
@staticmethod
|
||||
def validate_week(year, week):
|
||||
def validate_week(year: int, week: int) -> bool:
|
||||
"""Validate week."""
|
||||
|
||||
max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1]
|
||||
|
@ -379,34 +399,36 @@ class Inputs(object):
|
|||
return 1 <= week <= max_week
|
||||
|
||||
@staticmethod
|
||||
def validate_month(month):
|
||||
def validate_month(month: int) -> bool:
|
||||
"""Validate month."""
|
||||
|
||||
return 1 <= month <= 12
|
||||
|
||||
@staticmethod
|
||||
def validate_year(year):
|
||||
def validate_year(year: int) -> bool:
|
||||
"""Validate year."""
|
||||
|
||||
return 1 <= year
|
||||
|
||||
@staticmethod
|
||||
def validate_hour(hour):
|
||||
def validate_hour(hour: int) -> bool:
|
||||
"""Validate hour."""
|
||||
|
||||
return 0 <= hour <= 23
|
||||
|
||||
@staticmethod
|
||||
def validate_minutes(minutes):
|
||||
def validate_minutes(minutes: int) -> bool:
|
||||
"""Validate minutes."""
|
||||
|
||||
return 0 <= minutes <= 59
|
||||
|
||||
@classmethod
|
||||
def parse_value(cls, itype, value):
|
||||
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]:
|
||||
"""Parse the input value."""
|
||||
|
||||
parsed = None
|
||||
parsed = None # type: Optional[Tuple[float, ...]]
|
||||
if value is None:
|
||||
return value
|
||||
if itype == "date":
|
||||
m = RE_DATE.match(value)
|
||||
if m:
|
||||
|
@ -452,23 +474,29 @@ class Inputs(object):
|
|||
elif itype in ("number", "range"):
|
||||
m = RE_NUM.match(value)
|
||||
if m:
|
||||
parsed = float(m.group('value'))
|
||||
parsed = (float(m.group('value')),)
|
||||
return parsed
|
||||
|
||||
|
||||
class _Match(object):
|
||||
class CSSMatch(_DocumentNav):
|
||||
"""Perform CSS matching."""
|
||||
|
||||
def __init__(self, selectors, scope, namespaces, flags):
|
||||
def __init__(
|
||||
self,
|
||||
selectors: ct.SelectorList,
|
||||
scope: 'bs4.Tag',
|
||||
namespaces: Optional[ct.Namespaces],
|
||||
flags: int
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.assert_valid_input(scope)
|
||||
self.tag = scope
|
||||
self.cached_meta_lang = []
|
||||
self.cached_default_forms = []
|
||||
self.cached_indeterminate_forms = []
|
||||
self.cached_meta_lang = [] # type: List[Tuple[str, str]]
|
||||
self.cached_default_forms = [] # type: List[Tuple['bs4.Tag', 'bs4.Tag']]
|
||||
self.cached_indeterminate_forms = [] # type: List[Tuple['bs4.Tag', str, bool]]
|
||||
self.selectors = selectors
|
||||
self.namespaces = {} if namespaces is None else namespaces
|
||||
self.namespaces = {} if namespaces is None else namespaces # type: Union[ct.Namespaces, Dict[str, str]]
|
||||
self.flags = flags
|
||||
self.iframe_restrict = False
|
||||
|
||||
|
@ -494,12 +522,12 @@ class _Match(object):
|
|||
self.is_xml = self.is_xml_tree(doc)
|
||||
self.is_html = not self.is_xml or self.has_html_namespace
|
||||
|
||||
def supports_namespaces(self):
|
||||
def supports_namespaces(self) -> bool:
|
||||
"""Check if namespaces are supported in the HTML type."""
|
||||
|
||||
return self.is_xml or self.has_html_namespace
|
||||
|
||||
def get_tag_ns(self, el):
|
||||
def get_tag_ns(self, el: 'bs4.Tag') -> str:
|
||||
"""Get tag namespace."""
|
||||
|
||||
if self.supports_namespaces():
|
||||
|
@ -511,24 +539,24 @@ class _Match(object):
|
|||
namespace = NS_XHTML
|
||||
return namespace
|
||||
|
||||
def is_html_tag(self, el):
|
||||
def is_html_tag(self, el: 'bs4.Tag') -> bool:
|
||||
"""Check if tag is in HTML namespace."""
|
||||
|
||||
return self.get_tag_ns(el) == NS_XHTML
|
||||
|
||||
def get_tag(self, el):
|
||||
def get_tag(self, el: 'bs4.Tag') -> Optional[str]:
|
||||
"""Get tag."""
|
||||
|
||||
name = self.get_tag_name(el)
|
||||
return util.lower(name) if name is not None and not self.is_xml else name
|
||||
|
||||
def get_prefix(self, el):
|
||||
def get_prefix(self, el: 'bs4.Tag') -> Optional[str]:
|
||||
"""Get prefix."""
|
||||
|
||||
prefix = self.get_prefix_name(el)
|
||||
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
|
||||
|
||||
def find_bidi(self, el):
|
||||
def find_bidi(self, el: 'bs4.Tag') -> Optional[int]:
|
||||
"""Get directionality from element text."""
|
||||
|
||||
for node in self.get_children(el, tags=False):
|
||||
|
@ -564,7 +592,7 @@ class _Match(object):
|
|||
return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
|
||||
return None
|
||||
|
||||
def extended_language_filter(self, lang_range, lang_tag):
|
||||
def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:
|
||||
"""Filter the language tags."""
|
||||
|
||||
match = True
|
||||
|
@ -615,7 +643,12 @@ class _Match(object):
|
|||
|
||||
return match
|
||||
|
||||
def match_attribute_name(self, el, attr, prefix):
|
||||
def match_attribute_name(
|
||||
self,
|
||||
el: 'bs4.Tag',
|
||||
attr: str,
|
||||
prefix: Optional[str]
|
||||
) -> Optional[Union[str, Sequence[str]]]:
|
||||
"""Match attribute name and return value if it exists."""
|
||||
|
||||
value = None
|
||||
|
@ -663,13 +696,13 @@ class _Match(object):
|
|||
break
|
||||
return value
|
||||
|
||||
def match_namespace(self, el, tag):
|
||||
def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
|
||||
"""Match the namespace of the element."""
|
||||
|
||||
match = True
|
||||
namespace = self.get_tag_ns(el)
|
||||
default_namespace = self.namespaces.get('')
|
||||
tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix, None)
|
||||
tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix)
|
||||
# We must match the default namespace if one is not provided
|
||||
if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):
|
||||
match = False
|
||||
|
@ -684,27 +717,26 @@ class _Match(object):
|
|||
match = False
|
||||
return match
|
||||
|
||||
def match_attributes(self, el, attributes):
|
||||
def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool:
|
||||
"""Match attributes."""
|
||||
|
||||
match = True
|
||||
if attributes:
|
||||
for a in attributes:
|
||||
value = self.match_attribute_name(el, a.attribute, a.prefix)
|
||||
temp = self.match_attribute_name(el, a.attribute, a.prefix)
|
||||
pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern
|
||||
if isinstance(value, list):
|
||||
value = ' '.join(value)
|
||||
if value is None:
|
||||
if temp is None:
|
||||
match = False
|
||||
break
|
||||
elif pattern is None:
|
||||
value = temp if isinstance(temp, str) else ' '.join(temp)
|
||||
if pattern is None:
|
||||
continue
|
||||
elif pattern.match(value) is None:
|
||||
match = False
|
||||
break
|
||||
return match
|
||||
|
||||
def match_tagname(self, el, tag):
|
||||
def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
|
||||
"""Match tag name."""
|
||||
|
||||
name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
|
||||
|
@ -713,7 +745,7 @@ class _Match(object):
|
|||
name not in (self.get_tag(el), '*')
|
||||
)
|
||||
|
||||
def match_tag(self, el, tag):
|
||||
def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool:
|
||||
"""Match the tag."""
|
||||
|
||||
match = True
|
||||
|
@ -725,10 +757,14 @@ class _Match(object):
|
|||
match = False
|
||||
return match
|
||||
|
||||
def match_past_relations(self, el, relation):
|
||||
def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
|
||||
"""Match past relationship."""
|
||||
|
||||
found = False
|
||||
# I don't think this can ever happen, but it makes `mypy` happy
|
||||
if isinstance(relation[0], ct.SelectorNull): # pragma: no cover
|
||||
return found
|
||||
|
||||
if relation[0].rel_type == REL_PARENT:
|
||||
parent = self.get_parent(el, no_iframe=self.iframe_restrict)
|
||||
while not found and parent:
|
||||
|
@ -749,21 +785,28 @@ class _Match(object):
|
|||
found = self.match_selectors(sibling, relation)
|
||||
return found
|
||||
|
||||
def match_future_child(self, parent, relation, recursive=False):
|
||||
def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool:
|
||||
"""Match future child."""
|
||||
|
||||
match = False
|
||||
children = self.get_descendants if recursive else self.get_children
|
||||
if recursive:
|
||||
children = self.get_descendants # type: Callable[..., Iterator['bs4.Tag']]
|
||||
else:
|
||||
children = self.get_children
|
||||
for child in children(parent, no_iframe=self.iframe_restrict):
|
||||
match = self.match_selectors(child, relation)
|
||||
if match:
|
||||
break
|
||||
return match
|
||||
|
||||
def match_future_relations(self, el, relation):
|
||||
def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
|
||||
"""Match future relationship."""
|
||||
|
||||
found = False
|
||||
# I don't think this can ever happen, but it makes `mypy` happy
|
||||
if isinstance(relation[0], ct.SelectorNull): # pragma: no cover
|
||||
return found
|
||||
|
||||
if relation[0].rel_type == REL_HAS_PARENT:
|
||||
found = self.match_future_child(el, relation, True)
|
||||
elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
|
||||
|
@ -779,11 +822,14 @@ class _Match(object):
|
|||
found = self.match_selectors(sibling, relation)
|
||||
return found
|
||||
|
||||
def match_relations(self, el, relation):
|
||||
def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
|
||||
"""Match relationship to other elements."""
|
||||
|
||||
found = False
|
||||
|
||||
if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:
|
||||
return found
|
||||
|
||||
if relation[0].rel_type.startswith(':'):
|
||||
found = self.match_future_relations(el, relation)
|
||||
else:
|
||||
|
@ -791,7 +837,7 @@ class _Match(object):
|
|||
|
||||
return found
|
||||
|
||||
def match_id(self, el, ids):
|
||||
def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool:
|
||||
"""Match element's ID."""
|
||||
|
||||
found = True
|
||||
|
@ -801,7 +847,7 @@ class _Match(object):
|
|||
break
|
||||
return found
|
||||
|
||||
def match_classes(self, el, classes):
|
||||
def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool:
|
||||
"""Match element's classes."""
|
||||
|
||||
current_classes = self.get_classes(el)
|
||||
|
@ -812,7 +858,7 @@ class _Match(object):
|
|||
break
|
||||
return found
|
||||
|
||||
def match_root(self, el):
|
||||
def match_root(self, el: 'bs4.Tag') -> bool:
|
||||
"""Match element as root."""
|
||||
|
||||
is_root = self.is_root(el)
|
||||
|
@ -838,12 +884,12 @@ class _Match(object):
|
|||
sibling = self.get_next(sibling, tags=False)
|
||||
return is_root
|
||||
|
||||
def match_scope(self, el):
|
||||
def match_scope(self, el: 'bs4.Tag') -> bool:
|
||||
"""Match element as scope."""
|
||||
|
||||
return self.scope is el
|
||||
|
||||
def match_nth_tag_type(self, el, child):
|
||||
def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool:
|
||||
"""Match tag type for `nth` matches."""
|
||||
|
||||
return(
|
||||
|
@ -851,7 +897,7 @@ class _Match(object):
|
|||
(self.get_tag_ns(child) == self.get_tag_ns(el))
|
||||
)
|
||||
|
||||
def match_nth(self, el, nth):
|
||||
def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool:
|
||||
"""Match `nth` elements."""
|
||||
|
||||
matched = True
|
||||
|
@ -952,7 +998,7 @@ class _Match(object):
|
|||
break
|
||||
return matched
|
||||
|
||||
def match_empty(self, el):
|
||||
def match_empty(self, el: 'bs4.Tag') -> bool:
|
||||
"""Check if element is empty (if requested)."""
|
||||
|
||||
is_empty = True
|
||||
|
@ -965,7 +1011,7 @@ class _Match(object):
|
|||
break
|
||||
return is_empty
|
||||
|
||||
def match_subselectors(self, el, selectors):
|
||||
def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool:
|
||||
"""Match selectors."""
|
||||
|
||||
match = True
|
||||
|
@ -974,11 +1020,11 @@ class _Match(object):
|
|||
match = False
|
||||
return match
|
||||
|
||||
def match_contains(self, el, contains):
|
||||
def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool:
|
||||
"""Match element if it contains text."""
|
||||
|
||||
match = True
|
||||
content = None
|
||||
content = None # type: Optional[Union[str, Sequence[str]]]
|
||||
for contain_list in contains:
|
||||
if content is None:
|
||||
if contain_list.own:
|
||||
|
@ -1002,7 +1048,7 @@ class _Match(object):
|
|||
match = False
|
||||
return match
|
||||
|
||||
def match_default(self, el):
|
||||
def match_default(self, el: 'bs4.Tag') -> bool:
|
||||
"""Match default."""
|
||||
|
||||
match = False
|
||||
|
@ -1035,19 +1081,19 @@ class _Match(object):
|
|||
if name in ('input', 'button'):
|
||||
v = self.get_attribute_by_name(child, 'type', '')
|
||||
if v and util.lower(v) == 'submit':
|
||||
self.cached_default_forms.append([form, child])
|
||||
self.cached_default_forms.append((form, child))
|
||||
if el is child:
|
||||
match = True
|
||||
break
|
||||
return match
|
||||
|
||||
def match_indeterminate(self, el):
|
||||
def match_indeterminate(self, el: 'bs4.Tag') -> bool:
|
||||
"""Match default."""
|
||||
|
||||
match = False
|
||||
name = self.get_attribute_by_name(el, 'name')
|
||||
name = cast(str, self.get_attribute_by_name(el, 'name'))
|
||||
|
||||
def get_parent_form(el):
|
||||
def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']:
|
||||
"""Find this input's form."""
|
||||
form = None
|
||||
parent = self.get_parent(el, no_iframe=True)
|
||||
|
@ -1098,11 +1144,11 @@ class _Match(object):
|
|||
break
|
||||
if not checked:
|
||||
match = True
|
||||
self.cached_indeterminate_forms.append([form, name, match])
|
||||
self.cached_indeterminate_forms.append((form, name, match))
|
||||
|
||||
return match
|
||||
|
||||
def match_lang(self, el, langs):
|
||||
def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool:
|
||||
"""Match languages."""
|
||||
|
||||
match = False
|
||||
|
@ -1169,26 +1215,26 @@ class _Match(object):
|
|||
content = v
|
||||
if c_lang and content:
|
||||
found_lang = content
|
||||
self.cached_meta_lang.append((root, found_lang))
|
||||
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
|
||||
break
|
||||
if found_lang:
|
||||
break
|
||||
if not found_lang:
|
||||
self.cached_meta_lang.append((root, False))
|
||||
self.cached_meta_lang.append((cast(str, root), ''))
|
||||
|
||||
# If we determined a language, compare.
|
||||
if found_lang:
|
||||
for patterns in langs:
|
||||
match = False
|
||||
for pattern in patterns:
|
||||
if self.extended_language_filter(pattern, found_lang):
|
||||
if self.extended_language_filter(pattern, cast(str, found_lang)):
|
||||
match = True
|
||||
if not match:
|
||||
break
|
||||
|
||||
return match
|
||||
|
||||
def match_dir(self, el, directionality):
|
||||
def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool:
|
||||
"""Check directionality."""
|
||||
|
||||
# If we have to match both left and right, we can't match either.
|
||||
|
@ -1220,13 +1266,13 @@ class _Match(object):
|
|||
# Auto handling for text inputs
|
||||
if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
|
||||
if is_textarea:
|
||||
value = []
|
||||
temp = []
|
||||
for node in self.get_contents(el, no_iframe=True):
|
||||
if self.is_content_string(node):
|
||||
value.append(node)
|
||||
value = ''.join(value)
|
||||
temp.append(node)
|
||||
value = ''.join(temp)
|
||||
else:
|
||||
value = self.get_attribute_by_name(el, 'value', '')
|
||||
value = cast(str, self.get_attribute_by_name(el, 'value', ''))
|
||||
if value:
|
||||
for c in value:
|
||||
bidi = unicodedata.bidirectional(c)
|
||||
|
@ -1251,7 +1297,7 @@ class _Match(object):
|
|||
# Match parents direction
|
||||
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
|
||||
|
||||
def match_range(self, el, condition):
|
||||
def match_range(self, el: 'bs4.Tag', condition: int) -> bool:
|
||||
"""
|
||||
Match range.
|
||||
|
||||
|
@ -1264,20 +1310,14 @@ class _Match(object):
|
|||
out_of_range = False
|
||||
|
||||
itype = util.lower(self.get_attribute_by_name(el, 'type'))
|
||||
mn = self.get_attribute_by_name(el, 'min', None)
|
||||
if mn is not None:
|
||||
mn = Inputs.parse_value(itype, mn)
|
||||
mx = self.get_attribute_by_name(el, 'max', None)
|
||||
if mx is not None:
|
||||
mx = Inputs.parse_value(itype, mx)
|
||||
mn = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'min', None)))
|
||||
mx = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'max', None)))
|
||||
|
||||
# There is no valid min or max, so we cannot evaluate a range
|
||||
if mn is None and mx is None:
|
||||
return False
|
||||
|
||||
value = self.get_attribute_by_name(el, 'value', None)
|
||||
if value is not None:
|
||||
value = Inputs.parse_value(itype, value)
|
||||
value = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'value', None)))
|
||||
if value is not None:
|
||||
if itype in ("date", "datetime-local", "month", "week", "number", "range"):
|
||||
if mn is not None and value < mn:
|
||||
|
@ -1297,7 +1337,7 @@ class _Match(object):
|
|||
|
||||
return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
|
||||
|
||||
def match_defined(self, el):
|
||||
def match_defined(self, el: 'bs4.Tag') -> bool:
|
||||
"""
|
||||
Match defined.
|
||||
|
||||
|
@ -1313,12 +1353,14 @@ class _Match(object):
|
|||
|
||||
name = self.get_tag(el)
|
||||
return (
|
||||
name.find('-') == -1 or
|
||||
name.find(':') != -1 or
|
||||
self.get_prefix(el) is not None
|
||||
name is not None and (
|
||||
name.find('-') == -1 or
|
||||
name.find(':') != -1 or
|
||||
self.get_prefix(el) is not None
|
||||
)
|
||||
)
|
||||
|
||||
def match_placeholder_shown(self, el):
|
||||
def match_placeholder_shown(self, el: 'bs4.Tag') -> bool:
|
||||
"""
|
||||
Match placeholder shown according to HTML spec.
|
||||
|
||||
|
@ -1333,7 +1375,7 @@ class _Match(object):
|
|||
|
||||
return match
|
||||
|
||||
def match_selectors(self, el, selectors):
|
||||
def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool:
|
||||
"""Check if element matches one of the selectors."""
|
||||
|
||||
match = False
|
||||
|
@ -1405,7 +1447,7 @@ class _Match(object):
|
|||
if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):
|
||||
continue
|
||||
# Validate that the tag contains the specified text.
|
||||
if not self.match_contains(el, selector.contains):
|
||||
if selector.contains and not self.match_contains(el, selector.contains):
|
||||
continue
|
||||
match = not is_not
|
||||
break
|
||||
|
@ -1417,21 +1459,20 @@ class _Match(object):
|
|||
|
||||
return match
|
||||
|
||||
def select(self, limit=0):
|
||||
def select(self, limit: int = 0) -> Iterator['bs4.Tag']:
|
||||
"""Match all tags under the targeted tag."""
|
||||
|
||||
if limit < 1:
|
||||
limit = None
|
||||
lim = None if limit < 1 else limit
|
||||
|
||||
for child in self.get_descendants(self.tag):
|
||||
if self.match(child):
|
||||
yield child
|
||||
if limit is not None:
|
||||
limit -= 1
|
||||
if limit < 1:
|
||||
if lim is not None:
|
||||
lim -= 1
|
||||
if lim < 1:
|
||||
break
|
||||
|
||||
def closest(self):
|
||||
def closest(self) -> Optional['bs4.Tag']:
|
||||
"""Match closest ancestor."""
|
||||
|
||||
current = self.tag
|
||||
|
@ -1443,30 +1484,39 @@ class _Match(object):
|
|||
current = self.get_parent(current)
|
||||
return closest
|
||||
|
||||
def filter(self): # noqa A001
|
||||
def filter(self) -> List['bs4.Tag']: # noqa A001
|
||||
"""Filter tag's children."""
|
||||
|
||||
return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
|
||||
|
||||
def match(self, el):
|
||||
def match(self, el: 'bs4.Tag') -> bool:
|
||||
"""Match."""
|
||||
|
||||
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
|
||||
|
||||
|
||||
class CSSMatch(_DocumentNav, _Match):
|
||||
"""The Beautiful Soup CSS match class."""
|
||||
|
||||
|
||||
class SoupSieve(ct.Immutable):
|
||||
"""Compiled Soup Sieve selector matching object."""
|
||||
|
||||
pattern: str
|
||||
selectors: ct.SelectorList
|
||||
namespaces: Optional[ct.Namespaces]
|
||||
custom: Dict[str, str]
|
||||
flags: int
|
||||
|
||||
__slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
|
||||
|
||||
def __init__(self, pattern, selectors, namespaces, custom, flags):
|
||||
def __init__(
|
||||
self,
|
||||
pattern: str,
|
||||
selectors: ct.SelectorList,
|
||||
namespaces: Optional[ct.Namespaces],
|
||||
custom: Optional[ct.CustomSelectors],
|
||||
flags: int
|
||||
):
|
||||
"""Initialize."""
|
||||
|
||||
super(SoupSieve, self).__init__(
|
||||
super().__init__(
|
||||
pattern=pattern,
|
||||
selectors=selectors,
|
||||
namespaces=namespaces,
|
||||
|
@ -1474,17 +1524,17 @@ class SoupSieve(ct.Immutable):
|
|||
flags=flags
|
||||
)
|
||||
|
||||
def match(self, tag):
|
||||
def match(self, tag: 'bs4.Tag') -> bool:
|
||||
"""Match."""
|
||||
|
||||
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
|
||||
|
||||
def closest(self, tag):
|
||||
def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag':
|
||||
"""Match closest ancestor."""
|
||||
|
||||
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
|
||||
|
||||
def filter(self, iterable): # noqa A001
|
||||
def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']: # noqa A001
|
||||
"""
|
||||
Filter.
|
||||
|
||||
|
@ -1501,24 +1551,24 @@ class SoupSieve(ct.Immutable):
|
|||
else:
|
||||
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
|
||||
|
||||
def select_one(self, tag):
|
||||
def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag':
|
||||
"""Select a single tag."""
|
||||
|
||||
tags = self.select(tag, limit=1)
|
||||
return tags[0] if tags else None
|
||||
|
||||
def select(self, tag, limit=0):
|
||||
def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']:
|
||||
"""Select the specified tags."""
|
||||
|
||||
return list(self.iselect(tag, limit))
|
||||
|
||||
def iselect(self, tag, limit=0):
|
||||
def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']:
|
||||
"""Iterate the specified tags."""
|
||||
|
||||
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
|
||||
yield el
|
||||
|
||||
def __repr__(self): # pragma: no cover
|
||||
def __repr__(self) -> str: # pragma: no cover
|
||||
"""Representation."""
|
||||
|
||||
return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue