Bump beautifulsoup4 from 4.11.1 to 4.11.2 (#1987)

* Bump beautifulsoup4 from 4.11.1 to 4.11.2

Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.11.1 to 4.11.2.

---
updated-dependencies:
- dependency-name: beautifulsoup4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update beautifulsoup4==4.11.2

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2023-03-02 20:56:24 -08:00 committed by GitHub
commit 8e42757b2d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 449 additions and 537 deletions

View file

@ -25,13 +25,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
from __future__ import annotations
from .__meta__ import __version__, __version_info__ # noqa: F401
from . import css_parser as cp
from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Dict, Optional, Any, List, Iterator, Iterable
from typing import Optional, Any, Iterator, Iterable
__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@ -44,17 +45,14 @@ SoupSieve = cm.SoupSieve
def compile( # noqa: A001
pattern: str,
namespaces: Optional[Dict[str, str]] = None,
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
if isinstance(pattern, SoupSieve):
if flags:
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
@ -64,7 +62,12 @@ def compile( # noqa: A001
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
return pattern
return cp._cached_css_compile(pattern, ns, cs, flags)
return cp._cached_css_compile(
pattern,
ct.Namespaces(namespaces) if namespaces is not None else namespaces,
ct.CustomSelectors(custom) if custom is not None else custom,
flags
)
def purge() -> None:
@ -76,10 +79,10 @@ def purge() -> None:
def closest(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Match closest ancestor."""
@ -90,10 +93,10 @@ def closest(
def match(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> bool:
"""Match node."""
@ -104,12 +107,12 @@ def match(
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
namespaces: Optional[Dict[str, str]] = None,
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> List['bs4.Tag']:
) -> list['bs4.Tag']:
"""Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable)
@ -118,10 +121,10 @@ def filter( # noqa: A001
def select_one(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Select a single tag."""
@ -132,13 +135,13 @@ def select_one(
def select(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
namespaces: Optional[dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> List['bs4.Tag']:
) -> list['bs4.Tag']:
"""Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
@ -147,11 +150,11 @@ def select(
def iselect(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
namespaces: Optional[dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
"""Iterate the specified tags."""

View file

@ -1,4 +1,5 @@
"""Meta related things."""
from __future__ import annotations
from collections import namedtuple
import re
@ -83,7 +84,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
cls,
major: int, minor: int, micro: int, release: str = "final",
pre: int = 0, post: int = 0, dev: int = 0
) -> "Version":
) -> Version:
"""Validate version info."""
# Ensure all parts are positive integers.
@ -192,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 3, 2, "final", post=1)
__version_info__ = Version(2, 4, 0, "final")
__version__ = __version_info__._get_canonical()

View file

@ -1,11 +1,12 @@
"""CSS matcher."""
from __future__ import annotations
from datetime import datetime
from . import util
import re
from . import css_types as ct
import unicodedata
import bs4 # type: ignore[import]
from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast
from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast # noqa: F401
# Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -64,12 +65,12 @@ class _FakeParent:
fake parent so we can traverse the root element as a child.
"""
def __init__(self, element: 'bs4.Tag') -> None:
def __init__(self, element: bs4.Tag) -> None:
"""Initialize."""
self.contents = [element]
def __len__(self) -> 'bs4.PageElement':
def __len__(self) -> bs4.PageElement:
"""Length."""
return len(self.contents)
@ -87,59 +88,59 @@ class _DocumentNav:
raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag)))
@staticmethod
def is_doc(obj: 'bs4.Tag') -> bool:
def is_doc(obj: bs4.Tag) -> bool:
"""Is `BeautifulSoup` object."""
return isinstance(obj, bs4.BeautifulSoup)
@staticmethod
def is_tag(obj: 'bs4.PageElement') -> bool:
def is_tag(obj: bs4.PageElement) -> bool:
"""Is tag."""
return isinstance(obj, bs4.Tag)
@staticmethod
def is_declaration(obj: 'bs4.PageElement') -> bool: # pragma: no cover
def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover
"""Is declaration."""
return isinstance(obj, bs4.Declaration)
@staticmethod
def is_cdata(obj: 'bs4.PageElement') -> bool:
def is_cdata(obj: bs4.PageElement) -> bool:
"""Is CDATA."""
return isinstance(obj, bs4.CData)
@staticmethod
def is_processing_instruction(obj: 'bs4.PageElement') -> bool: # pragma: no cover
def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover
"""Is processing instruction."""
return isinstance(obj, bs4.ProcessingInstruction)
@staticmethod
def is_navigable_string(obj: 'bs4.PageElement') -> bool:
def is_navigable_string(obj: bs4.PageElement) -> bool:
"""Is navigable string."""
return isinstance(obj, bs4.NavigableString)
@staticmethod
def is_special_string(obj: 'bs4.PageElement') -> bool:
def is_special_string(obj: bs4.PageElement) -> bool:
"""Is special string."""
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
@classmethod
def is_content_string(cls, obj: 'bs4.PageElement') -> bool:
def is_content_string(cls, obj: bs4.PageElement) -> bool:
"""Check if node is content string."""
return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
@staticmethod
def create_fake_parent(el: 'bs4.Tag') -> _FakeParent:
def create_fake_parent(el: bs4.Tag) -> _FakeParent:
"""Create fake parent for a given element."""
return _FakeParent(el)
@staticmethod
def is_xml_tree(el: 'bs4.Tag') -> bool:
def is_xml_tree(el: bs4.Tag) -> bool:
"""Check if element (or document) is from a XML tree."""
return bool(el._is_xml)
def is_iframe(self, el: 'bs4.Tag') -> bool:
def is_iframe(self, el: bs4.Tag) -> bool:
"""Check if element is an `iframe`."""
return bool(
@ -147,7 +148,7 @@ class _DocumentNav:
self.is_html_tag(el) # type: ignore[attr-defined]
)
def is_root(self, el: 'bs4.Tag') -> bool:
def is_root(self, el: bs4.Tag) -> bool:
"""
Return whether element is a root element.
@ -161,7 +162,7 @@ class _DocumentNav:
root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
return root
def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']:
def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
"""Get contents or contents in reverse."""
if not no_iframe or not self.is_iframe(el):
for content in el.contents:
@ -169,12 +170,12 @@ class _DocumentNav:
def get_children(
self,
el: 'bs4.Tag',
el: bs4.Tag,
start: Optional[int] = None,
reverse: bool = False,
tags: bool = True,
no_iframe: bool = False
) -> Iterator['bs4.PageElement']:
) -> Iterator[bs4.PageElement]:
"""Get children."""
if not no_iframe or not self.is_iframe(el):
@ -195,10 +196,10 @@ class _DocumentNav:
def get_descendants(
self,
el: 'bs4.Tag',
el: bs4.Tag,
tags: bool = True,
no_iframe: bool = False
) -> Iterator['bs4.PageElement']:
) -> Iterator[bs4.PageElement]:
"""Get descendants."""
if not no_iframe or not self.is_iframe(el):
@ -229,7 +230,7 @@ class _DocumentNav:
if not tags or is_tag:
yield child
def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag':
def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag:
"""Get parent."""
parent = el.parent
@ -238,25 +239,25 @@ class _DocumentNav:
return parent
@staticmethod
def get_tag_name(el: 'bs4.Tag') -> Optional[str]:
def get_tag_name(el: bs4.Tag) -> Optional[str]:
"""Get tag."""
return cast(Optional[str], el.name)
@staticmethod
def get_prefix_name(el: 'bs4.Tag') -> Optional[str]:
def get_prefix_name(el: bs4.Tag) -> Optional[str]:
"""Get prefix."""
return cast(Optional[str], el.prefix)
@staticmethod
def get_uri(el: 'bs4.Tag') -> Optional[str]:
def get_uri(el: bs4.Tag) -> Optional[str]:
"""Get namespace `URI`."""
return cast(Optional[str], el.namespace)
@classmethod
def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
"""Get next sibling tag."""
sibling = el.next_sibling
@ -265,7 +266,7 @@ class _DocumentNav:
return sibling
@classmethod
def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
"""Get previous sibling tag."""
sibling = el.previous_sibling
@ -274,7 +275,7 @@ class _DocumentNav:
return sibling
@staticmethod
def has_html_ns(el: 'bs4.Tag') -> bool:
def has_html_ns(el: bs4.Tag) -> bool:
"""
Check if element has an HTML namespace.
@ -286,13 +287,13 @@ class _DocumentNav:
return bool(ns and ns == NS_XHTML)
@staticmethod
def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]:
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]:
"""Return namespace and attribute name without the prefix."""
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@classmethod
def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]:
def normalize_value(cls, value: Any) -> str | Sequence[str]:
"""Normalize the value to be a string or list of strings."""
# Treat `None` as empty string.
@ -327,10 +328,10 @@ class _DocumentNav:
@classmethod
def get_attribute_by_name(
cls,
el: 'bs4.Tag',
el: bs4.Tag,
name: str,
default: Optional[Union[str, Sequence[str]]] = None
) -> Optional[Union[str, Sequence[str]]]:
default: Optional[str | Sequence[str]] = None
) -> Optional[str | Sequence[str]]:
"""Get attribute by name."""
value = default
@ -347,14 +348,14 @@ class _DocumentNav:
return value
@classmethod
def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]:
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]:
"""Iterate attributes."""
for k, v in el.attrs.items():
yield k, cls.normalize_value(v)
@classmethod
def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]:
def get_classes(cls, el: bs4.Tag) -> Sequence[str]:
"""Get classes."""
classes = cls.get_attribute_by_name(el, 'class', [])
@ -362,14 +363,14 @@ class _DocumentNav:
classes = RE_NOT_WS.findall(classes)
return cast(Sequence[str], classes)
def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str:
def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:
"""Get text."""
return ''.join(
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
)
def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]:
def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:
"""Get Own Text."""
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
@ -423,10 +424,10 @@ class Inputs:
return 0 <= minutes <= 59
@classmethod
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]:
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]:
"""Parse the input value."""
parsed = None # type: Optional[Tuple[float, ...]]
parsed = None # type: Optional[tuple[float, ...]]
if value is None:
return value
if itype == "date":
@ -484,7 +485,7 @@ class CSSMatch(_DocumentNav):
def __init__(
self,
selectors: ct.SelectorList,
scope: 'bs4.Tag',
scope: bs4.Tag,
namespaces: Optional[ct.Namespaces],
flags: int
) -> None:
@ -492,11 +493,11 @@ class CSSMatch(_DocumentNav):
self.assert_valid_input(scope)
self.tag = scope
self.cached_meta_lang = [] # type: List[Tuple[str, str]]
self.cached_default_forms = [] # type: List[Tuple['bs4.Tag', 'bs4.Tag']]
self.cached_indeterminate_forms = [] # type: List[Tuple['bs4.Tag', str, bool]]
self.cached_meta_lang = [] # type: list[tuple[str, str]]
self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]
self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]
self.selectors = selectors
self.namespaces = {} if namespaces is None else namespaces # type: Union[ct.Namespaces, Dict[str, str]]
self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]
self.flags = flags
self.iframe_restrict = False
@ -527,7 +528,7 @@ class CSSMatch(_DocumentNav):
return self.is_xml or self.has_html_namespace
def get_tag_ns(self, el: 'bs4.Tag') -> str:
def get_tag_ns(self, el: bs4.Tag) -> str:
"""Get tag namespace."""
if self.supports_namespaces():
@ -539,24 +540,24 @@ class CSSMatch(_DocumentNav):
namespace = NS_XHTML
return namespace
def is_html_tag(self, el: 'bs4.Tag') -> bool:
def is_html_tag(self, el: bs4.Tag) -> bool:
"""Check if tag is in HTML namespace."""
return self.get_tag_ns(el) == NS_XHTML
def get_tag(self, el: 'bs4.Tag') -> Optional[str]:
def get_tag(self, el: bs4.Tag) -> Optional[str]:
"""Get tag."""
name = self.get_tag_name(el)
return util.lower(name) if name is not None and not self.is_xml else name
def get_prefix(self, el: 'bs4.Tag') -> Optional[str]:
def get_prefix(self, el: bs4.Tag) -> Optional[str]:
"""Get prefix."""
prefix = self.get_prefix_name(el)
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
def find_bidi(self, el: 'bs4.Tag') -> Optional[int]:
def find_bidi(self, el: bs4.Tag) -> Optional[int]:
"""Get directionality from element text."""
for node in self.get_children(el, tags=False):
@ -600,13 +601,18 @@ class CSSMatch(_DocumentNav):
ranges = lang_range.split('-')
subtags = lang_tag.lower().split('-')
length = len(ranges)
slength = len(subtags)
rindex = 0
sindex = 0
r = ranges[rindex]
s = subtags[sindex]
# Empty specified language should match unspecified language attributes
if length == 1 and slength == 1 and not r and r == s:
return True
# Primary tag needs to match
if r != '*' and r != s:
if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):
match = False
rindex += 1
@ -645,10 +651,10 @@ class CSSMatch(_DocumentNav):
def match_attribute_name(
self,
el: 'bs4.Tag',
el: bs4.Tag,
attr: str,
prefix: Optional[str]
) -> Optional[Union[str, Sequence[str]]]:
) -> Optional[str | Sequence[str]]:
"""Match attribute name and return value if it exists."""
value = None
@ -696,7 +702,7 @@ class CSSMatch(_DocumentNav):
break
return value
def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
"""Match the namespace of the element."""
match = True
@ -717,7 +723,7 @@ class CSSMatch(_DocumentNav):
match = False
return match
def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool:
def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:
"""Match attributes."""
match = True
@ -736,7 +742,7 @@ class CSSMatch(_DocumentNav):
break
return match
def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
"""Match tag name."""
name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
@ -745,7 +751,7 @@ class CSSMatch(_DocumentNav):
name not in (self.get_tag(el), '*')
)
def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool:
def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool:
"""Match the tag."""
match = True
@ -757,7 +763,7 @@ class CSSMatch(_DocumentNav):
match = False
return match
def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match past relationship."""
found = False
@ -785,12 +791,12 @@ class CSSMatch(_DocumentNav):
found = self.match_selectors(sibling, relation)
return found
def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool:
def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:
"""Match future child."""
match = False
if recursive:
children = self.get_descendants # type: Callable[..., Iterator['bs4.Tag']]
children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]]
else:
children = self.get_children
for child in children(parent, no_iframe=self.iframe_restrict):
@ -799,7 +805,7 @@ class CSSMatch(_DocumentNav):
break
return match
def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match future relationship."""
found = False
@ -822,7 +828,7 @@ class CSSMatch(_DocumentNav):
found = self.match_selectors(sibling, relation)
return found
def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match relationship to other elements."""
found = False
@ -837,7 +843,7 @@ class CSSMatch(_DocumentNav):
return found
def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool:
def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:
"""Match element's ID."""
found = True
@ -847,7 +853,7 @@ class CSSMatch(_DocumentNav):
break
return found
def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool:
def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:
"""Match element's classes."""
current_classes = self.get_classes(el)
@ -858,7 +864,7 @@ class CSSMatch(_DocumentNav):
break
return found
def match_root(self, el: 'bs4.Tag') -> bool:
def match_root(self, el: bs4.Tag) -> bool:
"""Match element as root."""
is_root = self.is_root(el)
@ -884,20 +890,20 @@ class CSSMatch(_DocumentNav):
sibling = self.get_next(sibling, tags=False)
return is_root
def match_scope(self, el: 'bs4.Tag') -> bool:
def match_scope(self, el: bs4.Tag) -> bool:
"""Match element as scope."""
return self.scope is el
def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool:
def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:
"""Match tag type for `nth` matches."""
return(
return (
(self.get_tag(child) == self.get_tag(el)) and
(self.get_tag_ns(child) == self.get_tag_ns(el))
)
def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool:
def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool:
"""Match `nth` elements."""
matched = True
@ -998,7 +1004,7 @@ class CSSMatch(_DocumentNav):
break
return matched
def match_empty(self, el: 'bs4.Tag') -> bool:
def match_empty(self, el: bs4.Tag) -> bool:
"""Check if element is empty (if requested)."""
is_empty = True
@ -1011,7 +1017,7 @@ class CSSMatch(_DocumentNav):
break
return is_empty
def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool:
def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:
"""Match selectors."""
match = True
@ -1020,11 +1026,11 @@ class CSSMatch(_DocumentNav):
match = False
return match
def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool:
def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:
"""Match element if it contains text."""
match = True
content = None # type: Optional[Union[str, Sequence[str]]]
content = None # type: Optional[str | Sequence[str]]
for contain_list in contains:
if content is None:
if contain_list.own:
@ -1048,7 +1054,7 @@ class CSSMatch(_DocumentNav):
match = False
return match
def match_default(self, el: 'bs4.Tag') -> bool:
def match_default(self, el: bs4.Tag) -> bool:
"""Match default."""
match = False
@ -1087,13 +1093,13 @@ class CSSMatch(_DocumentNav):
break
return match
def match_indeterminate(self, el: 'bs4.Tag') -> bool:
def match_indeterminate(self, el: bs4.Tag) -> bool:
"""Match default."""
match = False
name = cast(str, self.get_attribute_by_name(el, 'name'))
def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']:
def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]:
"""Find this input's form."""
form = None
parent = self.get_parent(el, no_iframe=True)
@ -1148,7 +1154,7 @@ class CSSMatch(_DocumentNav):
return match
def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool:
def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
"""Match languages."""
match = False
@ -1183,7 +1189,7 @@ class CSSMatch(_DocumentNav):
break
# Use cached meta language.
if not found_lang and self.cached_meta_lang:
if found_lang is None and self.cached_meta_lang:
for cache in self.cached_meta_lang:
if root is cache[0]:
found_lang = cache[1]
@ -1217,13 +1223,13 @@ class CSSMatch(_DocumentNav):
found_lang = content
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
break
if found_lang:
if found_lang is not None:
break
if not found_lang:
if found_lang is None:
self.cached_meta_lang.append((cast(str, root), ''))
# If we determined a language, compare.
if found_lang:
if found_lang is not None:
for patterns in langs:
match = False
for pattern in patterns:
@ -1234,7 +1240,7 @@ class CSSMatch(_DocumentNav):
return match
def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool:
def match_dir(self, el: bs4.Tag, directionality: int) -> bool:
"""Check directionality."""
# If we have to match both left and right, we can't match either.
@ -1297,7 +1303,7 @@ class CSSMatch(_DocumentNav):
# Match parents direction
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
def match_range(self, el: 'bs4.Tag', condition: int) -> bool:
def match_range(self, el: bs4.Tag, condition: int) -> bool:
"""
Match range.
@ -1337,7 +1343,7 @@ class CSSMatch(_DocumentNav):
return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
def match_defined(self, el: 'bs4.Tag') -> bool:
def match_defined(self, el: bs4.Tag) -> bool:
"""
Match defined.
@ -1360,7 +1366,7 @@ class CSSMatch(_DocumentNav):
)
)
def match_placeholder_shown(self, el: 'bs4.Tag') -> bool:
def match_placeholder_shown(self, el: bs4.Tag) -> bool:
"""
Match placeholder shown according to HTML spec.
@ -1375,7 +1381,7 @@ class CSSMatch(_DocumentNav):
return match
def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool:
def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:
"""Check if element matches one of the selectors."""
match = False
@ -1459,7 +1465,7 @@ class CSSMatch(_DocumentNav):
return match
def select(self, limit: int = 0) -> Iterator['bs4.Tag']:
def select(self, limit: int = 0) -> Iterator[bs4.Tag]:
"""Match all tags under the targeted tag."""
lim = None if limit < 1 else limit
@ -1472,7 +1478,7 @@ class CSSMatch(_DocumentNav):
if lim < 1:
break
def closest(self) -> Optional['bs4.Tag']:
def closest(self) -> Optional[bs4.Tag]:
"""Match closest ancestor."""
current = self.tag
@ -1484,12 +1490,12 @@ class CSSMatch(_DocumentNav):
current = self.get_parent(current)
return closest
def filter(self) -> List['bs4.Tag']: # noqa A001
def filter(self) -> list[bs4.Tag]: # noqa A001
"""Filter tag's children."""
return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
def match(self, el: 'bs4.Tag') -> bool:
def match(self, el: bs4.Tag) -> bool:
"""Match."""
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
@ -1501,7 +1507,7 @@ class SoupSieve(ct.Immutable):
pattern: str
selectors: ct.SelectorList
namespaces: Optional[ct.Namespaces]
custom: Dict[str, str]
custom: dict[str, str]
flags: int
__slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
@ -1524,17 +1530,17 @@ class SoupSieve(ct.Immutable):
flags=flags
)
def match(self, tag: 'bs4.Tag') -> bool:
def match(self, tag: bs4.Tag) -> bool:
"""Match."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag':
def closest(self, tag: bs4.Tag) -> bs4.Tag:
"""Match closest ancestor."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']: # noqa A001
def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001
"""
Filter.
@ -1551,18 +1557,18 @@ class SoupSieve(ct.Immutable):
else:
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag':
def select_one(self, tag: bs4.Tag) -> bs4.Tag:
"""Select a single tag."""
tags = self.select(tag, limit=1)
return tags[0] if tags else None
def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']:
def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:
"""Select the specified tags."""
return list(self.iselect(tag, limit))
def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']:
def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
"""Iterate the specified tags."""
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):

View file

@ -1,4 +1,5 @@
"""CSS selector parser."""
from __future__ import annotations
import re
from functools import lru_cache
from . import util
@ -6,7 +7,7 @@ from . import css_match as cm
from . import css_types as ct
from .util import SelectorSyntaxError
import warnings
from typing import Optional, Dict, Match, Tuple, Type, Any, List, Union, Iterator, cast
from typing import Optional, Match, Any, Iterator, cast
UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -232,7 +233,7 @@ def _purge_cache() -> None:
_cached_css_compile.cache_clear()
def process_custom(custom: Optional[ct.CustomSelectors]) -> Dict[str, Union[str, ct.SelectorList]]:
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
"""Process custom."""
custom_selectors = {}
@ -325,7 +326,7 @@ class SelectorPattern:
class SpecialPseudoPattern(SelectorPattern):
"""Selector pattern."""
def __init__(self, patterns: Tuple[Tuple[str, Tuple[str, ...], str, Type[SelectorPattern]], ...]) -> None:
def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
"""Initialize."""
self.patterns = {}
@ -372,19 +373,19 @@ class _Selector:
"""Initialize."""
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
self.ids = kwargs.get('ids', []) # type: List[str]
self.classes = kwargs.get('classes', []) # type: List[str]
self.attributes = kwargs.get('attributes', []) # type: List[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: List[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: List[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: List[_Selector]
self.ids = kwargs.get('ids', []) # type: list[str]
self.classes = kwargs.get('classes', []) # type: list[str]
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: list[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
self.contains = kwargs.get('contains', []) # type: List[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: List[ct.SelectorLang]
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
self.flags = kwargs.get('flags', 0) # type: int
self.no_match = kwargs.get('no_match', False) # type: bool
def _freeze_relations(self, relations: List['_Selector']) -> ct.SelectorList:
def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
"""Freeze relation."""
if relations:
@ -394,7 +395,7 @@ class _Selector:
else:
return ct.SelectorList()
def freeze(self) -> Union[ct.Selector, ct.SelectorNull]:
def freeze(self) -> ct.Selector | ct.SelectorNull:
"""Freeze self."""
if self.no_match:
@ -461,7 +462,7 @@ class CSSParser:
def __init__(
self,
selector: str,
custom: Optional[Dict[str, Union[str, ct.SelectorList]]] = None,
custom: Optional[dict[str, str | ct.SelectorList]] = None,
flags: int = 0
) -> None:
"""Initialize."""
@ -583,9 +584,9 @@ class CSSParser:
sel: _Selector,
m: Match[str],
has_selector: bool,
iselector: Iterator[Tuple[str, Match[str]]],
iselector: Iterator[tuple[str, Match[str]]],
is_html: bool
) -> Tuple[bool, bool]:
) -> tuple[bool, bool]:
"""Parse pseudo class."""
complex_pseudo = False
@ -678,7 +679,7 @@ class CSSParser:
sel: _Selector,
m: Match[str],
has_selector: bool,
iselector: Iterator[Tuple[str, Match[str]]]
iselector: Iterator[tuple[str, Match[str]]]
) -> bool:
"""Parse `nth` pseudo."""
@ -743,7 +744,7 @@ class CSSParser:
sel: _Selector,
name: str,
has_selector: bool,
iselector: Iterator[Tuple[str, Match[str]]],
iselector: Iterator[tuple[str, Match[str]]],
index: int
) -> bool:
"""Parse pseudo with opening bracket."""
@ -752,7 +753,7 @@ class CSSParser:
if name == ':not':
flags |= FLG_NOT
elif name == ':has':
flags |= FLG_RELATIVE | FLG_FORGIVE
flags |= FLG_RELATIVE
elif name in (':where', ':is'):
flags |= FLG_FORGIVE
@ -766,21 +767,16 @@ class CSSParser:
sel: _Selector,
m: Match[str],
has_selector: bool,
selectors: List[_Selector],
selectors: list[_Selector],
rel_type: str,
index: int
) -> Tuple[bool, _Selector, str]:
) -> tuple[bool, _Selector, str]:
"""Parse combinator tokens."""
combinator = m.group('relation').strip()
if not combinator:
combinator = WS_COMBINATOR
if combinator == COMMA_COMBINATOR:
if not has_selector:
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
sel.no_match = True
sel.rel_type = rel_type
selectors[-1].relations.append(sel)
rel_type = ":" + WS_COMBINATOR
@ -814,12 +810,12 @@ class CSSParser:
sel: _Selector,
m: Match[str],
has_selector: bool,
selectors: List[_Selector],
relations: List[_Selector],
selectors: list[_Selector],
relations: list[_Selector],
is_pseudo: bool,
is_forgive: bool,
index: int
) -> Tuple[bool, _Selector]:
) -> tuple[bool, _Selector]:
"""Parse combinator tokens."""
combinator = m.group('relation').strip()
@ -924,7 +920,7 @@ class CSSParser:
def parse_selectors(
self,
iselector: Iterator[Tuple[str, Match[str]]],
iselector: Iterator[tuple[str, Match[str]]],
index: int = 0,
flags: int = 0
) -> ct.SelectorList:
@ -935,7 +931,7 @@ class CSSParser:
selectors = []
has_selector = False
closed = False
relations = [] # type: List[_Selector]
relations = [] # type: list[_Selector]
rel_type = ":" + WS_COMBINATOR
# Setup various flags
@ -1069,22 +1065,12 @@ class CSSParser:
selectors.append(sel)
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
elif is_forgive:
if is_relative:
# Handle relative selectors pseudo-classes with empty slots like `:has()`
if selectors and selectors[-1].rel_type is None and rel_type == ': ':
sel.rel_type = rel_type
sel.no_match = True
selectors[-1].relations.append(sel)
has_selector = True
else:
# Handle normal pseudo-classes with empty slots
if not selectors or not relations:
# Others like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
has_selector = True
elif is_forgive and (not selectors or not relations):
# Handle normal pseudo-classes with empty slots like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
has_selector = True
if not has_selector:
# We will always need to finish a selector when `:has()` is used as it leads with combining.
@ -1112,7 +1098,7 @@ class CSSParser:
# Return selector list
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
def selector_iter(self, pattern: str) -> Iterator[Tuple[str, Match[str]]]:
def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
"""Iterate selector tokens."""
# Ignore whitespace and comments at start and end of pattern

View file

@ -1,7 +1,8 @@
"""CSS selector structure items."""
from __future__ import annotations
import copyreg
from .pretty import pretty
from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
__all__ = (
'Selector',
@ -33,7 +34,7 @@ SEL_PLACEHOLDER_SHOWN = 0x400
class Immutable:
"""Immutable."""
__slots__: Tuple[str, ...] = ('_hash',)
__slots__: tuple[str, ...] = ('_hash',)
_hash: int
@ -48,7 +49,7 @@ class Immutable:
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
@classmethod
def __base__(cls) -> "Type[Immutable]":
def __base__(cls) -> "type[Immutable]":
"""Get base class."""
return cls
@ -99,7 +100,7 @@ class ImmutableDict(Mapping[Any, Any]):
def __init__(
self,
arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]
arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
) -> None:
"""Initialize."""
@ -107,7 +108,7 @@ class ImmutableDict(Mapping[Any, Any]):
self._d = dict(arg)
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None:
def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
@ -147,12 +148,12 @@ class ImmutableDict(Mapping[Any, Any]):
class Namespaces(ImmutableDict):
"""Namespaces."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Initialize."""
super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
@ -165,12 +166,12 @@ class Namespaces(ImmutableDict):
class CustomSelectors(ImmutableDict):
"""Custom selectors."""
def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Initialize."""
super().__init__(arg)
def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None:
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
@ -188,30 +189,30 @@ class Selector(Immutable):
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)
tag: Optional['SelectorTag']
ids: Tuple[str, ...]
classes: Tuple[str, ...]
attributes: Tuple['SelectorAttribute', ...]
nth: Tuple['SelectorNth', ...]
selectors: Tuple['SelectorList', ...]
relation: 'SelectorList'
tag: Optional[SelectorTag]
ids: tuple[str, ...]
classes: tuple[str, ...]
attributes: tuple[SelectorAttribute, ...]
nth: tuple[SelectorNth, ...]
selectors: tuple[SelectorList, ...]
relation: SelectorList
rel_type: Optional[str]
contains: Tuple['SelectorContains', ...]
lang: Tuple['SelectorLang', ...]
contains: tuple[SelectorContains, ...]
lang: tuple[SelectorLang, ...]
flags: int
def __init__(
self,
tag: Optional['SelectorTag'],
ids: Tuple[str, ...],
classes: Tuple[str, ...],
attributes: Tuple['SelectorAttribute', ...],
nth: Tuple['SelectorNth', ...],
selectors: Tuple['SelectorList', ...],
relation: 'SelectorList',
tag: Optional[SelectorTag],
ids: tuple[str, ...],
classes: tuple[str, ...],
attributes: tuple[SelectorAttribute, ...],
nth: tuple[SelectorNth, ...],
selectors: tuple[SelectorList, ...],
relation: SelectorList,
rel_type: Optional[str],
contains: Tuple['SelectorContains', ...],
lang: Tuple['SelectorLang', ...],
contains: tuple[SelectorContains, ...],
lang: tuple[SelectorLang, ...],
flags: int
):
"""Initialize."""
@ -286,7 +287,7 @@ class SelectorContains(Immutable):
__slots__ = ("text", "own", "_hash")
text: Tuple[str, ...]
text: tuple[str, ...]
own: bool
def __init__(self, text: Iterable[str], own: bool) -> None:
@ -305,9 +306,9 @@ class SelectorNth(Immutable):
b: int
of_type: bool
last: bool
selectors: 'SelectorList'
selectors: SelectorList
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None:
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
"""Initialize."""
super().__init__(
@ -325,7 +326,7 @@ class SelectorLang(Immutable):
__slots__ = ("languages", "_hash",)
languages: Tuple[str, ...]
languages: tuple[str, ...]
def __init__(self, languages: Iterable[str]):
"""Initialize."""
@ -353,13 +354,13 @@ class SelectorList(Immutable):
__slots__ = ("selectors", "is_not", "is_html", "_hash")
selectors: Tuple[Union['Selector', 'SelectorNull'], ...]
selectors: tuple[Selector | SelectorNull, ...]
is_not: bool
is_html: bool
def __init__(
self,
selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None,
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
is_not: bool = False,
is_html: bool = False
) -> None:
@ -371,7 +372,7 @@ class SelectorList(Immutable):
is_html=is_html
)
def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]:
def __iter__(self) -> Iterator[Selector | SelectorNull]:
"""Iterator."""
return iter(self.selectors)
@ -381,7 +382,7 @@ class SelectorList(Immutable):
return len(self.selectors)
def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']:
def __getitem__(self, index: int) -> Selector | SelectorNull:
"""Get item."""
return self.selectors[index]

View file

@ -65,6 +65,7 @@ SelectorList(
is_html=False)
```
"""
from __future__ import annotations
import re
from typing import Any

View file

@ -1,8 +1,9 @@
"""Utility."""
from __future__ import annotations
from functools import wraps, lru_cache
import warnings
import re
from typing import Callable, Any, Optional, Tuple, List
from typing import Callable, Any, Optional
DEBUG = 0x00001
@ -75,13 +76,13 @@ def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no co
)
def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]:
def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
"""Get the pattern context."""
last = 0
current_line = 1
col = 1
text = [] # type: List[str]
text = [] # type: list[str]
line = 1
offset = None # type: Optional[int]