Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2
Updates rebulk to 2.0.1
This commit is contained in:
Labrys of Knossos 2022-11-28 19:44:46 -05:00
commit 2226a74ef8
66 changed files with 2995 additions and 1306 deletions

View file

@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '1.0.0'
__version__ = '2.0.1'

View file

@ -0,0 +1,217 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Base builder class for Rebulk
"""
from abc import ABCMeta, abstractmethod
from copy import deepcopy
from logging import getLogger
from six import add_metaclass
from .loose import set_defaults
from .pattern import RePattern, StringPattern, FunctionalPattern
log = getLogger(__name__).log
@add_metaclass(ABCMeta)
class Builder(object):
"""
Base builder class for patterns
"""
def __init__(self):
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
self._chain_defaults = {}
def reset(self):
"""
Reset all defaults.
:return:
"""
self.__init__()
def defaults(self, **kwargs):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._defaults, override=True)
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._regex_defaults, override=True)
return self
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._string_defaults, override=True)
return self
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._functional_defaults, override=True)
return self
def chain_defaults(self, **kwargs):
"""
Define default keyword arguments for patterns chain.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._chain_defaults, override=True)
return self
def build_re(self, *pattern, **kwargs):
"""
Builds a new regular expression pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return RePattern(*pattern, **kwargs)
def build_string(self, *pattern, **kwargs):
"""
Builds a new string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._string_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return StringPattern(*pattern, **kwargs)
def build_functional(self, *pattern, **kwargs):
"""
Builds a new functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return FunctionalPattern(*pattern, **kwargs)
def build_chain(self, **kwargs):
"""
Builds a new patterns chain
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
from .chain import Chain
set_defaults(self._chain_defaults, kwargs)
set_defaults(self._defaults, kwargs)
chain = Chain(self, **kwargs)
chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
return chain
@abstractmethod
def pattern(self, *pattern):
"""
Register a list of Pattern instance
:param pattern:
:return:
"""
pass
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
return self.pattern(self.build_re(*pattern, **kwargs))
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
return self.pattern(self.build_string(*pattern, **kwargs))
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
functional = self.build_functional(*pattern, **kwargs)
return self.pattern(functional)
def chain(self, **kwargs):
"""
Add patterns chain, using configuration of this rebulk
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
chain = self.build_chain(**kwargs)
self.pattern(chain)
return chain

View file

@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group
# pylint: disable=super-init-not-called
import itertools
from .loose import call, set_defaults
from .builder import Builder
from .loose import call
from .match import Match, Matches
from .pattern import Pattern, filter_match_kwargs
from .pattern import Pattern, filter_match_kwargs, BasePattern
from .remodule import re
@ -19,150 +20,46 @@ class _InvalidChainException(Exception):
pass
class Chain(Pattern):
class Chain(Pattern, Builder):
"""
Definition of a pattern chain to search for.
"""
def __init__(self, rebulk, chain_breaker=None, **kwargs):
call(super(Chain, self).__init__, **kwargs)
def __init__(self, parent, chain_breaker=None, **kwargs):
Builder.__init__(self)
call(Pattern.__init__, self, **kwargs)
self._kwargs = kwargs
self._match_kwargs = filter_match_kwargs(kwargs)
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
if callable(chain_breaker):
self.chain_breaker = chain_breaker
else:
self.chain_breaker = None
self.rebulk = rebulk
self.parent = parent
self.parts = []
def defaults(self, **kwargs):
def pattern(self, *pattern):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._defaults = kwargs
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._regex_defaults = kwargs
return self
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._string_defaults = kwargs
return self
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._functional_defaults = kwargs
return self
def chain(self):
"""
Add patterns chain, using configuration from this chain
:return:
:rtype:
"""
# pylint: disable=protected-access
chain = self.rebulk.chain(**self._kwargs)
chain._defaults = dict(self._defaults)
chain._regex_defaults = dict(self._regex_defaults)
chain._functional_defaults = dict(self._functional_defaults)
chain._string_defaults = dict(self._string_defaults)
return chain
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_re(*pattern, **kwargs)
part = ChainPart(self, pattern)
self.parts.append(part)
return part
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_functional(*pattern, **kwargs)
part = ChainPart(self, pattern)
self.parts.append(part)
return part
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_string(*pattern, **kwargs)
part = ChainPart(self, pattern)
if not pattern:
raise ValueError("One pattern should be given to the chain")
if len(pattern) > 1:
raise ValueError("Only one pattern can be given to the chain")
part = ChainPart(self, pattern[0])
self.parts.append(part)
return part
def close(self):
"""
Close chain builder to continue registering other pattern
:return:
:rtype:
Deeply close the chain
:return: Rebulk instance
"""
return self.rebulk
parent = self.parent
while isinstance(parent, Chain):
parent = parent.parent
return parent
def _match(self, pattern, input_string, context=None):
# pylint: disable=too-many-locals,too-many-nested-blocks
@ -173,42 +70,20 @@ class Chain(Pattern):
chain_found = False
current_chain_matches = []
valid_chain = True
is_chain_start = True
for chain_part in self.parts:
try:
chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part,
chain_input_string,
context)
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
if raw_chain_part_matches:
grouped_matches_dict = dict()
for match_index, match in itertools.groupby(chain_part_matches,
lambda m: m.match_index):
grouped_matches_dict[match_index] = list(match)
grouped_raw_matches_dict = dict()
for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
lambda m: m.match_index):
grouped_raw_matches_dict[match_index] = list(raw_match)
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
chain_found = True
offset = grouped_raw_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
grouped_matches = grouped_matches_dict.get(match_index, [])
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
current_chain_matches.extend(grouped_matches)
chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
context,
with_raw_matches=True)
chain_found, chain_input_string, offset = \
self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
input_string, chain_input_string, offset, current_chain_matches)
except _InvalidChainException:
valid_chain = False
if current_chain_matches:
offset = current_chain_matches[0].raw_end
break
is_chain_start = False
if not chain_found:
break
if current_chain_matches and valid_chain:
@ -217,38 +92,66 @@ class Chain(Pattern):
return chain_matches
def _match_parent(self, match, yield_parent):
def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
input_string, chain_input_string, offset, current_chain_matches):
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
if raw_chain_part_matches:
grouped_matches_dict = self._group_by_match_index(chain_part_matches)
grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
chain_found = True
offset = grouped_raw_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
grouped_matches = grouped_matches_dict.get(match_index, [])
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
current_chain_matches.extend(grouped_matches)
return chain_found, chain_input_string, offset
def _process_match(self, match, match_index, child=False):
"""
Handle a parent match
Handle a match
:param match:
:type match:
:param yield_parent:
:type yield_parent:
:param match_index:
:type match_index:
:param child:
:type child:
:return:
:rtype:
"""
ret = super(Chain, self)._match_parent(match, yield_parent)
original_children = Matches(match.children)
original_end = match.end
while not ret and match.children:
last_pattern = match.children[-1].pattern
last_pattern_children = [child for child in match.children if child.pattern == last_pattern]
last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index)
last_pattern_groups = {}
for index, matches in last_pattern_groups_iter:
last_pattern_groups[index] = list(matches)
# pylint: disable=too-many-locals
ret = super(Chain, self)._process_match(match, match_index, child=child)
if ret:
return True
for index in reversed(list(last_pattern_groups)):
last_matches = list(last_pattern_groups[index])
for last_match in last_matches:
match.children.remove(last_match)
match.end = match.children[-1].end if match.children else match.start
ret = super(Chain, self)._match_parent(match, yield_parent)
if ret:
return True
match.children = original_children
match.end = original_end
return ret
if match.children:
last_pattern = match.children[-1].pattern
last_pattern_groups = self._group_by_match_index(
[child_ for child_ in match.children if child_.pattern == last_pattern]
)
if last_pattern_groups:
original_children = Matches(match.children)
original_end = match.end
for index in reversed(list(last_pattern_groups)):
last_matches = last_pattern_groups[index]
for last_match in last_matches:
match.children.remove(last_match)
match.end = match.children[-1].end if match.children else match.start
ret = super(Chain, self)._process_match(match, match_index, child=child)
if ret:
return True
match.children = original_children
match.end = original_end
return False
def _build_chain_match(self, current_chain_matches, input_string):
start = None
@ -282,46 +185,11 @@ class Chain(Pattern):
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
@staticmethod
def _match_chain_part(is_chain_start, chain_part, chain_input_string, context):
chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context,
with_raw_matches=True)
chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part,
chain_input_string)
raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part,
chain_input_string)
Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part)
return chain_part_matches, raw_chain_part_matches
@staticmethod
def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string):
if not chain_part_matches:
return chain_part_matches
if not is_chain_start:
separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
if separator:
return []
j = 1
for i in range(0, len(chain_part_matches) - 1):
separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
chain_part_matches[i + 1].initiator.raw_start]
if separator:
break
j += 1
truncated = chain_part_matches[:j]
if chain_part.repeater_end is not None:
truncated = [m for m in truncated if m.match_index < chain_part.repeater_end]
return truncated
@staticmethod
def _validate_chain_part_matches(chain_part_matches, chain_part):
max_match_index = -1
if chain_part_matches:
max_match_index = max([m.match_index for m in chain_part_matches])
if max_match_index + 1 < chain_part.repeater_start:
raise _InvalidChainException
def _group_by_match_index(matches):
grouped_matches_dict = dict()
for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
grouped_matches_dict[match_index] = list(match)
return grouped_matches_dict
@property
def match_options(self):
@ -338,7 +206,7 @@ class Chain(Pattern):
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
class ChainPart(object):
class ChainPart(BasePattern):
"""
Part of a pattern chain.
"""
@ -350,6 +218,51 @@ class ChainPart(object):
self.repeater_end = 1
self._hidden = False
@property
def _is_chain_start(self):
return self._chain.parts[0] == self
def matches(self, input_string, context=None, with_raw_matches=False):
matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
matches = self._truncate_repeater(matches, input_string)
raw_matches = self._truncate_repeater(raw_matches, input_string)
self._validate_repeater(raw_matches)
if with_raw_matches:
return matches, raw_matches
return matches
def _truncate_repeater(self, matches, input_string):
if not matches:
return matches
if not self._is_chain_start:
separator = input_string[0:matches[0].initiator.raw_start]
if separator:
return []
j = 1
for i in range(0, len(matches) - 1):
separator = input_string[matches[i].initiator.raw_end:
matches[i + 1].initiator.raw_start]
if separator:
break
j += 1
truncated = matches[:j]
if self.repeater_end is not None:
truncated = [m for m in truncated if m.match_index < self.repeater_end]
return truncated
def _validate_repeater(self, matches):
max_match_index = -1
if matches:
max_match_index = max([m.match_index for m in matches])
if max_match_index + 1 < self.repeater_start:
raise _InvalidChainException
def chain(self):
"""
Add patterns chain, using configuration from this chain

View file

@ -15,9 +15,19 @@ def formatters(*chained_formatters):
:return:
:rtype:
"""
def formatters_chain(input_string): # pylint:disable=missing-docstring
for chained_formatter in chained_formatters:
input_string = chained_formatter(input_string)
return input_string
return formatters_chain
def default_formatter(input_string):
"""
Default formatter
:param input_string:
:return:
"""
return input_string

View file

@ -3,7 +3,7 @@
"""
Introspect rebulk object to retrieve capabilities.
"""
from abc import ABCMeta, abstractproperty
from abc import ABCMeta, abstractmethod
from collections import defaultdict
import six
@ -16,7 +16,8 @@ class Description(object):
"""
Abstract class for a description.
"""
@abstractproperty
@property
@abstractmethod
def properties(self): # pragma: no cover
"""
Properties of described object.

View file

@ -4,12 +4,12 @@
Various utilities functions
"""
import sys
import inspect
from inspect import isclass
try:
from inspect import getfullargspec as getargspec
_fullargspec_supported = True
except ImportError:
_fullargspec_supported = False
@ -55,8 +55,8 @@ def call(function, *args, **kwargs):
:return: sale vakye as default function call
:rtype: object
"""
func = constructor_args if inspect.isclass(function) else function_args
call_args, call_kwargs = func(function, *args, **kwargs)
func = constructor_args if isclass(function) else function_args
call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20
return function(*call_args, **call_kwargs)
@ -145,6 +145,8 @@ if not _fullargspec_supported:
else:
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
return call_args, call_kwarg
argspec_args = argspec_args_legacy
@ -215,9 +217,12 @@ def filter_index(collection, predicate=None, index=None):
return collection
def set_defaults(defaults, kwargs):
def set_defaults(defaults, kwargs, override=False):
"""
Set defaults from defaults dict to kwargs dict
:param override:
:type override:
:param defaults:
:type defaults:
:param kwargs:
@ -225,12 +230,13 @@ def set_defaults(defaults, kwargs):
:return:
:rtype:
"""
if 'clear' in defaults.keys() and defaults.pop('clear'):
kwargs.clear()
for key, value in defaults.items():
if key not in kwargs and value is not None:
if key in kwargs:
if isinstance(value, list) and isinstance(kwargs[key], list):
kwargs[key] = list(value) + kwargs[key]
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
set_defaults(value, kwargs[key])
if key not in kwargs or override:
kwargs[key] = value
elif isinstance(value, list) and isinstance(kwargs[key], list):
kwargs[key] = list(value) + kwargs[key]
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
set_defaults(value, kwargs[key])
elif key in kwargs and value is None:
kwargs[key] = None

View file

@ -815,6 +815,24 @@ class Match(object):
return filter_index(ret, predicate, index)
def tagged(self, *tags):
"""
Check if this match has at least one of the provided tags
:param tags:
:return: True if at least one tag is defined, False otherwise.
"""
return any(tag in self.tags for tag in tags)
def named(self, *names):
"""
Check if one of the children match has one of the provided name
:param names:
:return: True if at least one child is named with a given name is defined, False otherwise.
"""
return any(name in self.names for name in names)
def __len__(self):
return self.end - self.start

View file

@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty
import six
from . import debug
from .formatters import default_formatter
from .loose import call, ensure_list, ensure_dict
from .match import Match
from .remodule import re, REGEX_AVAILABLE
from .utils import find_all, is_iterable, get_first_defined
from .validators import allways_true
@six.add_metaclass(ABCMeta)
class Pattern(object):
class BasePattern(object):
"""
Base class for Pattern like objects
"""
@abstractmethod
def matches(self, input_string, context=None, with_raw_matches=False):
"""
Computes all matches for a given input
:param input_string: the string to parse
:type input_string: str
:param context: the context
:type context: dict
:param with_raw_matches: should return details
:type with_raw_matches: dict
:return: matches based on input_string for this pattern
:rtype: iterator[Match]
"""
pass
@six.add_metaclass(ABCMeta)
class Pattern(BasePattern):
"""
Definition of a particular pattern to search for.
"""
@ -25,7 +50,7 @@ class Pattern(object):
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
properties=None, post_processor=None, **kwargs):
properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
"""
:param name: Name of this pattern
:type name: str
@ -66,15 +91,19 @@ class Pattern(object):
:type disabled: bool|function
:param log_lvl: Log level associated to this pattern
:type log_lvl: int
:param post_process: Post processing function
:param post_processor: Post processing function
:type post_processor: func
:param pre_match_processor: Pre match processing function
:type pre_match_processor: func
:param post_match_processor: Post match processing function
:type post_match_processor: func
"""
# pylint:disable=too-many-locals,unused-argument
self.name = name
self.tags = ensure_list(tags)
self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x)
self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
self.values, self._default_value = ensure_dict(value, None)
self.validators, self._default_validator = ensure_dict(validator, lambda match: True)
self.validators, self._default_validator = ensure_dict(validator, allways_true)
self.every = every
self.children = children
self.private = private
@ -96,6 +125,14 @@ class Pattern(object):
self.post_processor = None
else:
self.post_processor = post_processor
if not callable(pre_match_processor):
self.pre_match_processor = None
else:
self.pre_match_processor = pre_match_processor
if not callable(post_match_processor):
self.post_match_processor = None
else:
self.post_match_processor = post_match_processor
@property
def log_level(self):
@ -106,83 +143,6 @@ class Pattern(object):
"""
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
def _yield_children(self, match):
"""
Does this match has children
:param match:
:type match:
:return:
:rtype:
"""
return match.children and (self.children or self.every)
def _yield_parent(self):
"""
Does this mat
:param match:
:type match:
:return:
:rtype:
"""
return not self.children or self.every
def _match_parent(self, match, yield_parent):
"""
Handle a parent match
:param match:
:type match:
:param yield_parent:
:type yield_parent:
:return:
:rtype:
"""
if not match or match.value == "":
return False
pattern_value = get_first_defined(self.values, [match.name, '__parent__', None],
self._default_value)
if pattern_value:
match.value = pattern_value
if yield_parent or self.format_all:
match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None],
self._default_formatter)
if yield_parent or self.validate_all:
validator = get_first_defined(self.validators, [match.name, '__parent__', None],
self._default_validator)
if validator and not validator(match):
return False
return True
def _match_child(self, child, yield_children):
"""
Handle a children match
:param child:
:type child:
:param yield_children:
:type yield_children:
:return:
:rtype:
"""
if not child or child.value == "":
return False
pattern_value = get_first_defined(self.values, [child.name, '__children__', None],
self._default_value)
if pattern_value:
child.value = pattern_value
if yield_children or self.format_all:
child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None],
self._default_formatter)
if yield_children or self.validate_all:
validator = get_first_defined(self.validators, [child.name, '__children__', None],
self._default_validator)
if validator and not validator(child):
return False
return True
def matches(self, input_string, context=None, with_raw_matches=False):
"""
Computes all matches for a given input
@ -200,41 +160,168 @@ class Pattern(object):
matches = []
raw_matches = []
for pattern in self.patterns:
yield_parent = self._yield_parent()
match_index = -1
match_index = 0
for match in self._match(pattern, input_string, context):
match_index += 1
match.match_index = match_index
raw_matches.append(match)
yield_children = self._yield_children(match)
if not self._match_parent(match, yield_parent):
continue
validated = True
for child in match.children:
if not self._match_child(child, yield_children):
validated = False
break
if validated:
if self.private_parent:
match.private = True
if self.private_children:
for child in match.children:
child.private = True
if yield_parent or self.private_parent:
matches.append(match)
if yield_children or self.private_children:
for child in match.children:
child.match_index = match_index
matches.append(child)
matches = self._matches_post_process(matches)
self._matches_privatize(matches)
self._matches_ignore(matches)
matches.extend(self._process_matches(match, match_index))
match_index += 1
matches = self._post_process_matches(matches)
if with_raw_matches:
return matches, raw_matches
return matches
def _matches_post_process(self, matches):
@property
def _should_include_children(self):
"""
Check if children matches from this pattern should be included in matches results.
:param match:
:type match:
:return:
:rtype:
"""
return self.children or self.every
@property
def _should_include_parent(self):
"""
Check is a match from this pattern should be included in matches results.
:param match:
:type match:
:return:
:rtype:
"""
return not self.children or self.every
@staticmethod
def _match_config_property_keys(match, child=False):
if match.name:
yield match.name
if child:
yield '__children__'
else:
yield '__parent__'
yield None
@staticmethod
def _process_match_index(match, match_index):
"""
Process match index from this pattern process state.
:param match:
:return:
"""
match.match_index = match_index
def _process_match_private(self, match, child=False):
"""
Process match privacy from this pattern configuration.
:param match:
:param child:
:return:
"""
if match.name and match.name in self.private_names or \
not child and self.private_parent or \
child and self.private_children:
match.private = True
def _process_match_value(self, match, child=False):
"""
Process match value from this pattern configuration.
:param match:
:return:
"""
keys = self._match_config_property_keys(match, child=child)
pattern_value = get_first_defined(self.values, keys, self._default_value)
if pattern_value:
match.value = pattern_value
def _process_match_formatter(self, match, child=False):
"""
Process match formatter from this pattern configuration.
:param match:
:return:
"""
included = self._should_include_children if child else self._should_include_parent
if included or self.format_all:
keys = self._match_config_property_keys(match, child=child)
match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
def _process_match_validator(self, match, child=False):
"""
Process match validation from this pattern configuration.
:param match:
:return: True if match is validated by the configured validator, False otherwise.
"""
included = self._should_include_children if child else self._should_include_parent
if included or self.validate_all:
keys = self._match_config_property_keys(match, child=child)
validator = get_first_defined(self.validators, keys, self._default_validator)
if validator and not validator(match):
return False
return True
def _process_match(self, match, match_index, child=False):
"""
Process match from this pattern by setting all properties from defined configuration
(index, private, value, formatter, validator, ...).
:param match:
:type match:
:return: True if match is validated by the configured validator, False otherwise.
:rtype:
"""
self._process_match_index(match, match_index)
self._process_match_private(match, child)
self._process_match_value(match, child)
self._process_match_formatter(match, child)
return self._process_match_validator(match, child)
@staticmethod
def _process_match_processor(match, processor):
if processor:
ret = processor(match)
if ret is not None:
return ret
return match
def _process_matches(self, match, match_index):
"""
Process and generate all matches for the given unprocessed match.
:param match:
:param match_index:
:return: Process and dispatched matches.
"""
match = self._process_match_processor(match, self.pre_match_processor)
if not match:
return
if not self._process_match(match, match_index):
return
for child in match.children:
if not self._process_match(child, match_index, child=True):
return
match = self._process_match_processor(match, self.post_match_processor)
if not match:
return
if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
yield match
if self._should_include_children or self.private_children:
children = [x for x in match.children if x.name not in self.ignore_names]
for child in children:
yield child
def _post_process_matches(self, matches):
"""
Post process matches with user defined function
:param matches:
@ -246,32 +333,6 @@ class Pattern(object):
return self.post_processor(matches, self)
return matches
def _matches_privatize(self, matches):
"""
Mark matches included in private_names with private flag.
:param matches:
:type matches:
:return:
:rtype:
"""
if self.private_names:
for match in matches:
if match.name in self.private_names:
match.private = True
def _matches_ignore(self, matches):
"""
Ignore matches included in ignore_names.
:param matches:
:type matches:
:return:
:rtype:
"""
if self.ignore_names:
for match in list(matches):
if match.name in self.ignore_names:
matches.remove(match)
@abstractproperty
def patterns(self): # pragma: no cover
"""
@ -306,7 +367,7 @@ class Pattern(object):
@abstractmethod
def _match(self, pattern, input_string, context=None): # pragma: no cover
"""
Computes all matches for a given pattern and input
Computes all unprocess matches for a given pattern and input.
:param pattern: the pattern to use
:param input_string: the string to parse
@ -350,7 +411,9 @@ class StringPattern(Pattern):
def _match(self, pattern, input_string, context=None):
for index in find_all(input_string, pattern, **self._kwargs):
yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
if match:
yield match
class RePattern(Pattern):
@ -411,15 +474,18 @@ class RePattern(Pattern):
for start, end in match_object.spans(i):
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs)
main_match.children.append(child_match)
if child_match:
main_match.children.append(child_match)
else:
start, end = match_object.span(i)
if start > -1 and end > -1:
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs)
main_match.children.append(child_match)
if child_match:
main_match.children.append(child_match)
yield main_match
if main_match:
yield main_match
class FunctionalPattern(Pattern):
@ -457,14 +523,18 @@ class FunctionalPattern(Pattern):
if self._match_kwargs:
options = self._match_kwargs.copy()
options.update(args)
yield Match(pattern=self, input_string=input_string, **options)
match = Match(pattern=self, input_string=input_string, **options)
if match:
yield match
else:
kwargs = self._match_kwargs
if isinstance(args[-1], dict):
kwargs = dict(kwargs)
kwargs.update(args[-1])
args = args[:-1]
yield Match(*args, pattern=self, input_string=input_string, **kwargs)
match = Match(*args, pattern=self, input_string=input_string, **kwargs)
if match:
yield match
def filter_match_kwargs(kwargs, children=False):

View file

@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk
"""
from logging import getLogger
from .builder import Builder
from .match import Matches
from .pattern import RePattern, StringPattern, FunctionalPattern
from .chain import Chain
from .processors import ConflictSolver, PrivateRemover
from .loose import set_defaults
from .utils import extend_safe
from .rules import Rules
from .utils import extend_safe
log = getLogger(__name__).log
class Rebulk(object):
class Rebulk(Builder):
r"""
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
@ -44,6 +40,7 @@ class Rebulk(object):
>>> bulk.matches("the lakers are from la")
[<lakers:(4, 10)>, <la:(20, 22)>]
"""
# pylint:disable=protected-access
def __init__(self, disabled=lambda context: False, default_rules=True):
@ -56,6 +53,7 @@ class Rebulk(object):
:return:
:rtype:
"""
super(Rebulk, self).__init__()
if not callable(disabled):
self.disabled = lambda context: disabled
else:
@ -64,11 +62,6 @@ class Rebulk(object):
self._rules = Rules()
if default_rules:
self.rules(ConflictSolver, PrivateRemover)
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
self._chain_defaults = {}
self._rebulks = []
def pattern(self, *pattern):
@ -83,172 +76,6 @@ class Rebulk(object):
self._patterns.extend(pattern)
return self
def defaults(self, **kwargs):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._defaults = kwargs
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._regex_defaults = kwargs
return self
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_re(*pattern, **kwargs))
return self
def build_re(self, *pattern, **kwargs):
"""
Builds a new regular expression pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return RePattern(*pattern, **kwargs)
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._string_defaults = kwargs
return self
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_string(*pattern, **kwargs))
return self
def build_string(self, *pattern, **kwargs):
"""
Builds a new string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._string_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return StringPattern(*pattern, **kwargs)
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._functional_defaults = kwargs
return self
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_functional(*pattern, **kwargs))
return self
def build_functional(self, *pattern, **kwargs):
"""
Builds a new functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return FunctionalPattern(*pattern, **kwargs)
def chain_defaults(self, **kwargs):
"""
Define default keyword arguments for patterns chain.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._chain_defaults = kwargs
return self
def chain(self, **kwargs):
"""
Add patterns chain, using configuration of this rebulk
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
chain = self.build_chain(**kwargs)
self._patterns.append(chain)
return chain
def build_chain(self, **kwargs):
"""
Builds a new patterns chain
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._chain_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return Chain(self, **kwargs)
def rules(self, *rules):
"""
Add rules as a module, class or instance.

View file

@ -2,11 +2,11 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
import re
from functools import partial
from rebulk.pattern import FunctionalPattern, StringPattern, RePattern
from ..rebulk import Rebulk
from ..validators import chars_surround
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
def test_chain_close():
@ -63,18 +63,61 @@ def test_build_chain():
def test_chain_defaults():
rebulk = Rebulk()
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True)
rebulk.chain()\
rebulk.chain() \
.regex("(?P<test>test)") \
.regex(" ").repeater("*") \
.regex("(?P<best>best)") \
.regex(" ").repeater("*") \
.regex("(?P<testIgnore>testIgnore)")
matches = rebulk.matches("test testIgnore")
matches = rebulk.matches("test best testIgnore")
assert len(matches) == 1
assert matches[0].name == "test"
def test_chain_with_validators():
def chain_validator(match):
return match.value.startswith('t') and match.value.endswith('t')
def default_validator(match):
return match.value.startswith('t') and match.value.endswith('g')
def custom_validator(match):
return match.value.startswith('b') and match.value.endswith('t')
rebulk = Rebulk()
rebulk.defaults(children=True, validator=default_validator)
rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \
.regex("(?P<test>testing)", validator=default_validator).repeater("+") \
.regex(" ").repeater("+") \
.regex("(?P<best>best)", validator=custom_validator).repeater("+")
matches = rebulk.matches("some testing best end")
assert len(matches) == 2
assert matches[0].name == "test"
assert matches[1].name == "best"
def test_matches_docs():
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \
.defaults(children=True, formatter={'episode': int, 'version': int}) \
.chain() \
.regex(r'e(?P<episode>\d{1,4})').repeater(1) \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
.close() # .repeater(1) could be omitted as it's the default behavior
result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict
assert 'episode' in result
assert result['episode'] == [14, 15, 16, 17]
assert 'version' in result
assert result['version'] == 2
def test_matches():
rebulk = Rebulk()
@ -144,8 +187,8 @@ def test_matches():
def test_matches_2():
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.chain(children=True, formatter={'episode': int}) \
.defaults(formatter={'version': int}) \
.defaults(children=True, formatter={'episode': int, 'version': int}) \
.chain() \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
@ -173,25 +216,32 @@ def test_matches_2():
def test_matches_3():
alt_dash = (r'@', r'[\W_]') # abbreviation
rebulk = Rebulk()
match_names = ['season', 'episode']
other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
private_names=['episodeSeparator', 'seasonSeparator'],
children=True,
private_parent=True,
conflict_solver=lambda match, other: match
if match.name in ['season', 'episode'] and other.name in
['screen_size', 'video_codec', 'audio_codec',
'audio_channels', 'container', 'date']
else '__default__') \
rebulk = Rebulk()
rebulk.defaults(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
private_names=['episodeSeparator', 'seasonSeparator'],
children=True,
private_parent=True,
conflict_solver=lambda match, other: match
if match.name in match_names and other.name in other_names
else '__default__')
rebulk.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
.close() \
.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
.close() \
.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)') \
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
@ -240,11 +290,11 @@ def test_matches_4():
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
rebulk.defaults(validate_all=True, children=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \
.defaults(formatter={'episode': int, 'version': int}) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
@ -262,11 +312,11 @@ def test_matches_5():
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True,
formatter={'episode': int, 'version': int}) \
.defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
@ -288,7 +338,7 @@ def test_matches_6():
validator=None, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')

View file

@ -2,19 +2,15 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
from .default_rules_module import RuleRemove0
from .. import debug
from ..match import Match
from ..pattern import StringPattern
from ..rebulk import Rebulk
from ..match import Match
from .. import debug
from .default_rules_module import RuleRemove0
class TestDebug(object):
#request.addfinalizer(disable_debug)
# request.addfinalizer(disable_debug)
debug.DEBUG = True
pattern = StringPattern(1, 3, value="es")
@ -38,43 +34,43 @@ class TestDebug(object):
debug.DEBUG = False
def test_pattern(self):
assert self.pattern.defined_at.lineno == 20
assert self.pattern.defined_at.lineno > 0
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
assert self.pattern.defined_at.filename.endswith('test_debug.py')
assert str(self.pattern.defined_at) == 'test_debug.py#L20'
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
assert str(self.pattern.defined_at).startswith('test_debug.py#L')
assert repr(self.pattern).startswith('<StringPattern@test_debug.py#L')
def test_match(self):
assert self.match.defined_at.lineno == 22
assert self.match.defined_at.lineno > 0
assert self.match.defined_at.name == 'rebulk.test.test_debug'
assert self.match.defined_at.filename.endswith('test_debug.py')
assert str(self.match.defined_at) == 'test_debug.py#L22'
assert str(self.match.defined_at).startswith('test_debug.py#L')
def test_rule(self):
assert self.rule.defined_at.lineno == 23
assert self.rule.defined_at.lineno > 0
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
assert self.rule.defined_at.filename.endswith('test_debug.py')
assert str(self.rule.defined_at) == 'test_debug.py#L23'
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
assert str(self.rule.defined_at).startswith('test_debug.py#L')
assert repr(self.rule).startswith('<RuleRemove0@test_debug.py#L')
def test_rebulk(self):
"""
This test fails on travis CI, can't find out why there's 1 line offset ...
"""
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
assert self.rebulk._patterns[0].defined_at.lineno > 0
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L')
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
assert self.rebulk._patterns[1].defined_at.lineno > 0
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L')
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at

View file

@ -116,6 +116,9 @@ class TestMatchesClass(object):
assert "tag1" in matches.tags
assert "tag2" in matches.tags
assert self.match3.tagged("tag1")
assert not self.match3.tagged("start")
tag1 = matches.tagged("tag1")
assert len(tag1) == 2
assert tag1[0] == self.match2

View file

@ -62,9 +62,20 @@ def validators(*chained_validators):
:return:
:rtype:
"""
def validator_chain(match): # pylint:disable=missing-docstring
for chained_validator in chained_validators:
if not chained_validator(match):
return False
return True
return validator_chain
def allways_true(match): # pylint:disable=unused-argument
"""
A validator which is allways true
:param match:
:return:
"""
return True