Update soupsieve==2.3.1

This commit is contained in:
JonnyWong16 2021-11-28 14:13:48 -08:00
parent dcfd8abddd
commit 36b55398a8
No known key found for this signature in database
GPG key ID: B1F1F9807184697A
8 changed files with 791 additions and 375 deletions

View file

@ -2,11 +2,10 @@
from datetime import datetime
from . import util
import re
from .import css_types as ct
from . import css_types as ct
import unicodedata
from collections.abc import Sequence
import bs4
import bs4 # type: ignore[import]
from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast
# Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -56,7 +55,7 @@ FEB_LEAP_MONTH = 29
DAYS_IN_WEEK = 7
class _FakeParent(object):
class _FakeParent:
"""
Fake parent class.
@ -65,22 +64,22 @@ class _FakeParent(object):
fake parent so we can traverse the root element as a child.
"""
def __init__(self, element):
def __init__(self, element: 'bs4.Tag') -> None:
"""Initialize."""
self.contents = [element]
def __len__(self):
def __len__(self) -> 'bs4.PageElement':
"""Length."""
return len(self.contents)
class _DocumentNav(object):
class _DocumentNav:
"""Navigate a Beautiful Soup document."""
@classmethod
def assert_valid_input(cls, tag):
def assert_valid_input(cls, tag: Any) -> None:
"""Check if valid input tag or document."""
# Fail on unexpected types.
@ -88,64 +87,67 @@ class _DocumentNav(object):
raise TypeError("Expected a BeautifulSoup 'Tag', but instead recieved type {}".format(type(tag)))
@staticmethod
def is_doc(obj):
def is_doc(obj: 'bs4.Tag') -> bool:
"""Is `BeautifulSoup` object."""
return isinstance(obj, bs4.BeautifulSoup)
@staticmethod
def is_tag(obj):
def is_tag(obj: 'bs4.PageElement') -> bool:
"""Is tag."""
return isinstance(obj, bs4.Tag)
@staticmethod
def is_declaration(obj): # pragma: no cover
def is_declaration(obj: 'bs4.PageElement') -> bool: # pragma: no cover
"""Is declaration."""
return isinstance(obj, bs4.Declaration)
@staticmethod
def is_cdata(obj):
def is_cdata(obj: 'bs4.PageElement') -> bool:
"""Is CDATA."""
return isinstance(obj, bs4.CData)
@staticmethod
def is_processing_instruction(obj): # pragma: no cover
def is_processing_instruction(obj: 'bs4.PageElement') -> bool: # pragma: no cover
"""Is processing instruction."""
return isinstance(obj, bs4.ProcessingInstruction)
@staticmethod
def is_navigable_string(obj):
def is_navigable_string(obj: 'bs4.PageElement') -> bool:
"""Is navigable string."""
return isinstance(obj, bs4.NavigableString)
@staticmethod
def is_special_string(obj):
def is_special_string(obj: 'bs4.PageElement') -> bool:
"""Is special string."""
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
@classmethod
def is_content_string(cls, obj):
def is_content_string(cls, obj: 'bs4.PageElement') -> bool:
"""Check if node is content string."""
return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
@staticmethod
def create_fake_parent(el):
def create_fake_parent(el: 'bs4.Tag') -> _FakeParent:
"""Create fake parent for a given element."""
return _FakeParent(el)
@staticmethod
def is_xml_tree(el):
def is_xml_tree(el: 'bs4.Tag') -> bool:
"""Check if element (or document) is from a XML tree."""
return el._is_xml
return bool(el._is_xml)
def is_iframe(self, el):
def is_iframe(self, el: 'bs4.Tag') -> bool:
"""Check if element is an `iframe`."""
return ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and self.is_html_tag(el)
return bool(
((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and
self.is_html_tag(el) # type: ignore[attr-defined]
)
def is_root(self, el):
def is_root(self, el: 'bs4.Tag') -> bool:
"""
Return whether element is a root element.
@ -153,19 +155,26 @@ class _DocumentNav(object):
and we check if it is the root element under an `iframe`.
"""
root = self.root and self.root is el
root = self.root and self.root is el # type: ignore[attr-defined]
if not root:
parent = self.get_parent(el)
root = parent is not None and self.is_html and self.is_iframe(parent)
root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
return root
def get_contents(self, el, no_iframe=False):
def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']:
"""Get contents or contents in reverse."""
if not no_iframe or not self.is_iframe(el):
for content in el.contents:
yield content
def get_children(self, el, start=None, reverse=False, tags=True, no_iframe=False):
def get_children(
self,
el: 'bs4.Tag',
start: Optional[int] = None,
reverse: bool = False,
tags: bool = True,
no_iframe: bool = False
) -> Iterator['bs4.PageElement']:
"""Get children."""
if not no_iframe or not self.is_iframe(el):
@ -184,7 +193,12 @@ class _DocumentNav(object):
if not tags or self.is_tag(node):
yield node
def get_descendants(self, el, tags=True, no_iframe=False):
def get_descendants(
self,
el: 'bs4.Tag',
tags: bool = True,
no_iframe: bool = False
) -> Iterator['bs4.PageElement']:
"""Get descendants."""
if not no_iframe or not self.is_iframe(el):
@ -215,7 +229,7 @@ class _DocumentNav(object):
if not tags or is_tag:
yield child
def get_parent(self, el, no_iframe=False):
def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag':
"""Get parent."""
parent = el.parent
@ -224,25 +238,25 @@ class _DocumentNav(object):
return parent
@staticmethod
def get_tag_name(el):
def get_tag_name(el: 'bs4.Tag') -> Optional[str]:
"""Get tag."""
return el.name
return cast(Optional[str], el.name)
@staticmethod
def get_prefix_name(el):
def get_prefix_name(el: 'bs4.Tag') -> Optional[str]:
"""Get prefix."""
return el.prefix
return cast(Optional[str], el.prefix)
@staticmethod
def get_uri(el):
def get_uri(el: 'bs4.Tag') -> Optional[str]:
"""Get namespace `URI`."""
return el.namespace
return cast(Optional[str], el.namespace)
@classmethod
def get_next(cls, el, tags=True):
def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
"""Get next sibling tag."""
sibling = el.next_sibling
@ -251,7 +265,7 @@ class _DocumentNav(object):
return sibling
@classmethod
def get_previous(cls, el, tags=True):
def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
"""Get previous sibling tag."""
sibling = el.previous_sibling
@ -260,7 +274,7 @@ class _DocumentNav(object):
return sibling
@staticmethod
def has_html_ns(el):
def has_html_ns(el: 'bs4.Tag') -> bool:
"""
Check if element has an HTML namespace.
@ -269,16 +283,16 @@ class _DocumentNav(object):
"""
ns = getattr(el, 'namespace') if el else None
return ns and ns == NS_XHTML
return bool(ns and ns == NS_XHTML)
@staticmethod
def split_namespace(el, attr_name):
def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]:
"""Return namespace and attribute name without the prefix."""
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@classmethod
def normalize_value(cls, value):
def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]:
"""Normalize the value to be a string or list of strings."""
# Treat `None` as empty string.
@ -297,20 +311,26 @@ class _DocumentNav(object):
if isinstance(value, Sequence):
new_value = []
for v in value:
if isinstance(v, Sequence):
# This is most certainly a user error and will crash and burn later,
# but to avoid excessive recursion, kick out now.
new_value.append(v)
if not isinstance(v, (str, bytes)) and isinstance(v, Sequence):
# This is most certainly a user error and will crash and burn later.
# To keep things working, we'll do what we do with all objects,
# And convert them to strings.
new_value.append(str(v))
else:
# Convert the child to a string
new_value.append(cls.normalize_value(v))
new_value.append(cast(str, cls.normalize_value(v)))
return new_value
# Try and make anything else a string
return str(value)
@classmethod
def get_attribute_by_name(cls, el, name, default=None):
def get_attribute_by_name(
cls,
el: 'bs4.Tag',
name: str,
default: Optional[Union[str, Sequence[str]]] = None
) -> Optional[Union[str, Sequence[str]]]:
"""Get attribute by name."""
value = default
@ -327,39 +347,39 @@ class _DocumentNav(object):
return value
@classmethod
def iter_attributes(cls, el):
def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]:
"""Iterate attributes."""
for k, v in el.attrs.items():
yield k, cls.normalize_value(v)
@classmethod
def get_classes(cls, el):
def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]:
"""Get classes."""
classes = cls.get_attribute_by_name(el, 'class', [])
if isinstance(classes, str):
classes = RE_NOT_WS.findall(classes)
return classes
return cast(Sequence[str], classes)
def get_text(self, el, no_iframe=False):
def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str:
"""Get text."""
return ''.join(
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
)
def get_own_text(self, el, no_iframe=False):
def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]:
"""Get Own Text."""
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
class Inputs(object):
class Inputs:
"""Class for parsing and validating input items."""
@staticmethod
def validate_day(year, month, day):
def validate_day(year: int, month: int, day: int) -> bool:
"""Validate day."""
max_days = LONG_MONTH
@ -370,7 +390,7 @@ class Inputs(object):
return 1 <= day <= max_days
@staticmethod
def validate_week(year, week):
def validate_week(year: int, week: int) -> bool:
"""Validate week."""
max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1]
@ -379,34 +399,36 @@ class Inputs(object):
return 1 <= week <= max_week
@staticmethod
def validate_month(month):
def validate_month(month: int) -> bool:
"""Validate month."""
return 1 <= month <= 12
@staticmethod
def validate_year(year):
def validate_year(year: int) -> bool:
"""Validate year."""
return 1 <= year
@staticmethod
def validate_hour(hour):
def validate_hour(hour: int) -> bool:
"""Validate hour."""
return 0 <= hour <= 23
@staticmethod
def validate_minutes(minutes):
def validate_minutes(minutes: int) -> bool:
"""Validate minutes."""
return 0 <= minutes <= 59
@classmethod
def parse_value(cls, itype, value):
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]:
"""Parse the input value."""
parsed = None
parsed = None # type: Optional[Tuple[float, ...]]
if value is None:
return value
if itype == "date":
m = RE_DATE.match(value)
if m:
@ -452,23 +474,29 @@ class Inputs(object):
elif itype in ("number", "range"):
m = RE_NUM.match(value)
if m:
parsed = float(m.group('value'))
parsed = (float(m.group('value')),)
return parsed
class _Match(object):
class CSSMatch(_DocumentNav):
"""Perform CSS matching."""
def __init__(self, selectors, scope, namespaces, flags):
def __init__(
self,
selectors: ct.SelectorList,
scope: 'bs4.Tag',
namespaces: Optional[ct.Namespaces],
flags: int
) -> None:
"""Initialize."""
self.assert_valid_input(scope)
self.tag = scope
self.cached_meta_lang = []
self.cached_default_forms = []
self.cached_indeterminate_forms = []
self.cached_meta_lang = [] # type: List[Tuple[str, str]]
self.cached_default_forms = [] # type: List[Tuple['bs4.Tag', 'bs4.Tag']]
self.cached_indeterminate_forms = [] # type: List[Tuple['bs4.Tag', str, bool]]
self.selectors = selectors
self.namespaces = {} if namespaces is None else namespaces
self.namespaces = {} if namespaces is None else namespaces # type: Union[ct.Namespaces, Dict[str, str]]
self.flags = flags
self.iframe_restrict = False
@ -494,12 +522,12 @@ class _Match(object):
self.is_xml = self.is_xml_tree(doc)
self.is_html = not self.is_xml or self.has_html_namespace
def supports_namespaces(self):
def supports_namespaces(self) -> bool:
"""Check if namespaces are supported in the HTML type."""
return self.is_xml or self.has_html_namespace
def get_tag_ns(self, el):
def get_tag_ns(self, el: 'bs4.Tag') -> str:
"""Get tag namespace."""
if self.supports_namespaces():
@ -511,24 +539,24 @@ class _Match(object):
namespace = NS_XHTML
return namespace
def is_html_tag(self, el):
def is_html_tag(self, el: 'bs4.Tag') -> bool:
"""Check if tag is in HTML namespace."""
return self.get_tag_ns(el) == NS_XHTML
def get_tag(self, el):
def get_tag(self, el: 'bs4.Tag') -> Optional[str]:
"""Get tag."""
name = self.get_tag_name(el)
return util.lower(name) if name is not None and not self.is_xml else name
def get_prefix(self, el):
def get_prefix(self, el: 'bs4.Tag') -> Optional[str]:
"""Get prefix."""
prefix = self.get_prefix_name(el)
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
def find_bidi(self, el):
def find_bidi(self, el: 'bs4.Tag') -> Optional[int]:
"""Get directionality from element text."""
for node in self.get_children(el, tags=False):
@ -564,7 +592,7 @@ class _Match(object):
return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
return None
def extended_language_filter(self, lang_range, lang_tag):
def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:
"""Filter the language tags."""
match = True
@ -615,7 +643,12 @@ class _Match(object):
return match
def match_attribute_name(self, el, attr, prefix):
def match_attribute_name(
self,
el: 'bs4.Tag',
attr: str,
prefix: Optional[str]
) -> Optional[Union[str, Sequence[str]]]:
"""Match attribute name and return value if it exists."""
value = None
@ -663,13 +696,13 @@ class _Match(object):
break
return value
def match_namespace(self, el, tag):
def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
"""Match the namespace of the element."""
match = True
namespace = self.get_tag_ns(el)
default_namespace = self.namespaces.get('')
tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix, None)
tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix)
# We must match the default namespace if one is not provided
if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):
match = False
@ -684,27 +717,26 @@ class _Match(object):
match = False
return match
def match_attributes(self, el, attributes):
def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool:
"""Match attributes."""
match = True
if attributes:
for a in attributes:
value = self.match_attribute_name(el, a.attribute, a.prefix)
temp = self.match_attribute_name(el, a.attribute, a.prefix)
pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern
if isinstance(value, list):
value = ' '.join(value)
if value is None:
if temp is None:
match = False
break
elif pattern is None:
value = temp if isinstance(temp, str) else ' '.join(temp)
if pattern is None:
continue
elif pattern.match(value) is None:
match = False
break
return match
def match_tagname(self, el, tag):
def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
"""Match tag name."""
name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
@ -713,7 +745,7 @@ class _Match(object):
name not in (self.get_tag(el), '*')
)
def match_tag(self, el, tag):
def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool:
"""Match the tag."""
match = True
@ -725,10 +757,14 @@ class _Match(object):
match = False
return match
def match_past_relations(self, el, relation):
def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
"""Match past relationship."""
found = False
# I don't think this can ever happen, but it makes `mypy` happy
if isinstance(relation[0], ct.SelectorNull): # pragma: no cover
return found
if relation[0].rel_type == REL_PARENT:
parent = self.get_parent(el, no_iframe=self.iframe_restrict)
while not found and parent:
@ -749,21 +785,28 @@ class _Match(object):
found = self.match_selectors(sibling, relation)
return found
def match_future_child(self, parent, relation, recursive=False):
def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool:
"""Match future child."""
match = False
children = self.get_descendants if recursive else self.get_children
if recursive:
children = self.get_descendants # type: Callable[..., Iterator['bs4.Tag']]
else:
children = self.get_children
for child in children(parent, no_iframe=self.iframe_restrict):
match = self.match_selectors(child, relation)
if match:
break
return match
def match_future_relations(self, el, relation):
def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
"""Match future relationship."""
found = False
# I don't think this can ever happen, but it makes `mypy` happy
if isinstance(relation[0], ct.SelectorNull): # pragma: no cover
return found
if relation[0].rel_type == REL_HAS_PARENT:
found = self.match_future_child(el, relation, True)
elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
@ -779,11 +822,14 @@ class _Match(object):
found = self.match_selectors(sibling, relation)
return found
def match_relations(self, el, relation):
def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
"""Match relationship to other elements."""
found = False
if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:
return found
if relation[0].rel_type.startswith(':'):
found = self.match_future_relations(el, relation)
else:
@ -791,7 +837,7 @@ class _Match(object):
return found
def match_id(self, el, ids):
def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool:
"""Match element's ID."""
found = True
@ -801,7 +847,7 @@ class _Match(object):
break
return found
def match_classes(self, el, classes):
def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool:
"""Match element's classes."""
current_classes = self.get_classes(el)
@ -812,7 +858,7 @@ class _Match(object):
break
return found
def match_root(self, el):
def match_root(self, el: 'bs4.Tag') -> bool:
"""Match element as root."""
is_root = self.is_root(el)
@ -838,12 +884,12 @@ class _Match(object):
sibling = self.get_next(sibling, tags=False)
return is_root
def match_scope(self, el):
def match_scope(self, el: 'bs4.Tag') -> bool:
"""Match element as scope."""
return self.scope is el
def match_nth_tag_type(self, el, child):
def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool:
"""Match tag type for `nth` matches."""
return(
@ -851,7 +897,7 @@ class _Match(object):
(self.get_tag_ns(child) == self.get_tag_ns(el))
)
def match_nth(self, el, nth):
def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool:
"""Match `nth` elements."""
matched = True
@ -952,7 +998,7 @@ class _Match(object):
break
return matched
def match_empty(self, el):
def match_empty(self, el: 'bs4.Tag') -> bool:
"""Check if element is empty (if requested)."""
is_empty = True
@ -965,7 +1011,7 @@ class _Match(object):
break
return is_empty
def match_subselectors(self, el, selectors):
def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool:
"""Match selectors."""
match = True
@ -974,11 +1020,11 @@ class _Match(object):
match = False
return match
def match_contains(self, el, contains):
def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool:
"""Match element if it contains text."""
match = True
content = None
content = None # type: Optional[Union[str, Sequence[str]]]
for contain_list in contains:
if content is None:
if contain_list.own:
@ -1002,7 +1048,7 @@ class _Match(object):
match = False
return match
def match_default(self, el):
def match_default(self, el: 'bs4.Tag') -> bool:
"""Match default."""
match = False
@ -1035,19 +1081,19 @@ class _Match(object):
if name in ('input', 'button'):
v = self.get_attribute_by_name(child, 'type', '')
if v and util.lower(v) == 'submit':
self.cached_default_forms.append([form, child])
self.cached_default_forms.append((form, child))
if el is child:
match = True
break
return match
def match_indeterminate(self, el):
def match_indeterminate(self, el: 'bs4.Tag') -> bool:
"""Match default."""
match = False
name = self.get_attribute_by_name(el, 'name')
name = cast(str, self.get_attribute_by_name(el, 'name'))
def get_parent_form(el):
def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']:
"""Find this input's form."""
form = None
parent = self.get_parent(el, no_iframe=True)
@ -1098,11 +1144,11 @@ class _Match(object):
break
if not checked:
match = True
self.cached_indeterminate_forms.append([form, name, match])
self.cached_indeterminate_forms.append((form, name, match))
return match
def match_lang(self, el, langs):
def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool:
"""Match languages."""
match = False
@ -1169,26 +1215,26 @@ class _Match(object):
content = v
if c_lang and content:
found_lang = content
self.cached_meta_lang.append((root, found_lang))
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
break
if found_lang:
break
if not found_lang:
self.cached_meta_lang.append((root, False))
self.cached_meta_lang.append((cast(str, root), ''))
# If we determined a language, compare.
if found_lang:
for patterns in langs:
match = False
for pattern in patterns:
if self.extended_language_filter(pattern, found_lang):
if self.extended_language_filter(pattern, cast(str, found_lang)):
match = True
if not match:
break
return match
def match_dir(self, el, directionality):
def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool:
"""Check directionality."""
# If we have to match both left and right, we can't match either.
@ -1220,13 +1266,13 @@ class _Match(object):
# Auto handling for text inputs
if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
if is_textarea:
value = []
temp = []
for node in self.get_contents(el, no_iframe=True):
if self.is_content_string(node):
value.append(node)
value = ''.join(value)
temp.append(node)
value = ''.join(temp)
else:
value = self.get_attribute_by_name(el, 'value', '')
value = cast(str, self.get_attribute_by_name(el, 'value', ''))
if value:
for c in value:
bidi = unicodedata.bidirectional(c)
@ -1251,7 +1297,7 @@ class _Match(object):
# Match parents direction
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
def match_range(self, el, condition):
def match_range(self, el: 'bs4.Tag', condition: int) -> bool:
"""
Match range.
@ -1264,20 +1310,14 @@ class _Match(object):
out_of_range = False
itype = util.lower(self.get_attribute_by_name(el, 'type'))
mn = self.get_attribute_by_name(el, 'min', None)
if mn is not None:
mn = Inputs.parse_value(itype, mn)
mx = self.get_attribute_by_name(el, 'max', None)
if mx is not None:
mx = Inputs.parse_value(itype, mx)
mn = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'min', None)))
mx = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'max', None)))
# There is no valid min or max, so we cannot evaluate a range
if mn is None and mx is None:
return False
value = self.get_attribute_by_name(el, 'value', None)
if value is not None:
value = Inputs.parse_value(itype, value)
value = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'value', None)))
if value is not None:
if itype in ("date", "datetime-local", "month", "week", "number", "range"):
if mn is not None and value < mn:
@ -1297,7 +1337,7 @@ class _Match(object):
return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
def match_defined(self, el):
def match_defined(self, el: 'bs4.Tag') -> bool:
"""
Match defined.
@ -1313,12 +1353,14 @@ class _Match(object):
name = self.get_tag(el)
return (
name.find('-') == -1 or
name.find(':') != -1 or
self.get_prefix(el) is not None
name is not None and (
name.find('-') == -1 or
name.find(':') != -1 or
self.get_prefix(el) is not None
)
)
def match_placeholder_shown(self, el):
def match_placeholder_shown(self, el: 'bs4.Tag') -> bool:
"""
Match placeholder shown according to HTML spec.
@ -1333,7 +1375,7 @@ class _Match(object):
return match
def match_selectors(self, el, selectors):
def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool:
"""Check if element matches one of the selectors."""
match = False
@ -1405,7 +1447,7 @@ class _Match(object):
if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):
continue
# Validate that the tag contains the specified text.
if not self.match_contains(el, selector.contains):
if selector.contains and not self.match_contains(el, selector.contains):
continue
match = not is_not
break
@ -1417,21 +1459,20 @@ class _Match(object):
return match
def select(self, limit=0):
def select(self, limit: int = 0) -> Iterator['bs4.Tag']:
"""Match all tags under the targeted tag."""
if limit < 1:
limit = None
lim = None if limit < 1 else limit
for child in self.get_descendants(self.tag):
if self.match(child):
yield child
if limit is not None:
limit -= 1
if limit < 1:
if lim is not None:
lim -= 1
if lim < 1:
break
def closest(self):
def closest(self) -> Optional['bs4.Tag']:
"""Match closest ancestor."""
current = self.tag
@ -1443,30 +1484,39 @@ class _Match(object):
current = self.get_parent(current)
return closest
def filter(self): # noqa A001
def filter(self) -> List['bs4.Tag']: # noqa A001
"""Filter tag's children."""
return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
def match(self, el):
def match(self, el: 'bs4.Tag') -> bool:
"""Match."""
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
class CSSMatch(_DocumentNav, _Match):
"""The Beautiful Soup CSS match class."""
class SoupSieve(ct.Immutable):
"""Compiled Soup Sieve selector matching object."""
pattern: str
selectors: ct.SelectorList
namespaces: Optional[ct.Namespaces]
custom: Dict[str, str]
flags: int
__slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
def __init__(self, pattern, selectors, namespaces, custom, flags):
def __init__(
self,
pattern: str,
selectors: ct.SelectorList,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
flags: int
):
"""Initialize."""
super(SoupSieve, self).__init__(
super().__init__(
pattern=pattern,
selectors=selectors,
namespaces=namespaces,
@ -1474,17 +1524,17 @@ class SoupSieve(ct.Immutable):
flags=flags
)
def match(self, tag):
def match(self, tag: 'bs4.Tag') -> bool:
"""Match."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
def closest(self, tag):
def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag':
"""Match closest ancestor."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
def filter(self, iterable): # noqa A001
def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']: # noqa A001
"""
Filter.
@ -1501,24 +1551,24 @@ class SoupSieve(ct.Immutable):
else:
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
def select_one(self, tag):
def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag':
"""Select a single tag."""
tags = self.select(tag, limit=1)
return tags[0] if tags else None
def select(self, tag, limit=0):
def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']:
"""Select the specified tags."""
return list(self.iselect(tag, limit))
def iselect(self, tag, limit=0):
def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']:
"""Iterate the specified tags."""
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
yield el
def __repr__(self): # pragma: no cover
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format(