mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-14 18:47:09 -07:00
Update vendored guessit to 3.1.1
Updates python-dateutil to 2.8.2 Updates rebulk to 2.0.1
This commit is contained in:
parent
ebc9718117
commit
2226a74ef8
66 changed files with 2995 additions and 1306 deletions
|
@ -4,4 +4,4 @@
|
|||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '1.0.0'
|
||||
__version__ = '2.0.1'
|
||||
|
|
217
libs/common/rebulk/builder.py
Normal file
217
libs/common/rebulk/builder.py
Normal file
|
@ -0,0 +1,217 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Base builder class for Rebulk
|
||||
"""
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from copy import deepcopy
|
||||
from logging import getLogger
|
||||
|
||||
from six import add_metaclass
|
||||
|
||||
from .loose import set_defaults
|
||||
from .pattern import RePattern, StringPattern, FunctionalPattern
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
@add_metaclass(ABCMeta)
|
||||
class Builder(object):
|
||||
"""
|
||||
Base builder class for patterns
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
self._chain_defaults = {}
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset all defaults.
|
||||
|
||||
:return:
|
||||
"""
|
||||
self.__init__()
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._defaults, override=True)
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._regex_defaults, override=True)
|
||||
return self
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._string_defaults, override=True)
|
||||
return self
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._functional_defaults, override=True)
|
||||
return self
|
||||
|
||||
def chain_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for patterns chain.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._chain_defaults, override=True)
|
||||
return self
|
||||
|
||||
def build_re(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new regular expression pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return RePattern(*pattern, **kwargs)
|
||||
|
||||
def build_string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._string_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return StringPattern(*pattern, **kwargs)
|
||||
|
||||
def build_functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return FunctionalPattern(*pattern, **kwargs)
|
||||
|
||||
def build_chain(self, **kwargs):
|
||||
"""
|
||||
Builds a new patterns chain
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
from .chain import Chain
|
||||
set_defaults(self._chain_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
chain = Chain(self, **kwargs)
|
||||
chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
|
||||
chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
|
||||
chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
|
||||
chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
|
||||
chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
|
||||
return chain
|
||||
|
||||
@abstractmethod
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
Register a list of Pattern instance
|
||||
:param pattern:
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return self.pattern(self.build_re(*pattern, **kwargs))
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return self.pattern(self.build_string(*pattern, **kwargs))
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
functional = self.build_functional(*pattern, **kwargs)
|
||||
return self.pattern(functional)
|
||||
|
||||
def chain(self, **kwargs):
|
||||
"""
|
||||
Add patterns chain, using configuration of this rebulk
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
chain = self.build_chain(**kwargs)
|
||||
self.pattern(chain)
|
||||
return chain
|
|
@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group
|
|||
# pylint: disable=super-init-not-called
|
||||
import itertools
|
||||
|
||||
from .loose import call, set_defaults
|
||||
from .builder import Builder
|
||||
from .loose import call
|
||||
from .match import Match, Matches
|
||||
from .pattern import Pattern, filter_match_kwargs
|
||||
from .pattern import Pattern, filter_match_kwargs, BasePattern
|
||||
from .remodule import re
|
||||
|
||||
|
||||
|
@ -19,150 +20,46 @@ class _InvalidChainException(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class Chain(Pattern):
|
||||
class Chain(Pattern, Builder):
|
||||
"""
|
||||
Definition of a pattern chain to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, rebulk, chain_breaker=None, **kwargs):
|
||||
call(super(Chain, self).__init__, **kwargs)
|
||||
def __init__(self, parent, chain_breaker=None, **kwargs):
|
||||
Builder.__init__(self)
|
||||
call(Pattern.__init__, self, **kwargs)
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
if callable(chain_breaker):
|
||||
self.chain_breaker = chain_breaker
|
||||
else:
|
||||
self.chain_breaker = None
|
||||
self.rebulk = rebulk
|
||||
self.parent = parent
|
||||
self.parts = []
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._regex_defaults = kwargs
|
||||
return self
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._string_defaults = kwargs
|
||||
return self
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._functional_defaults = kwargs
|
||||
return self
|
||||
|
||||
def chain(self):
|
||||
"""
|
||||
Add patterns chain, using configuration from this chain
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint: disable=protected-access
|
||||
chain = self.rebulk.chain(**self._kwargs)
|
||||
chain._defaults = dict(self._defaults)
|
||||
chain._regex_defaults = dict(self._regex_defaults)
|
||||
chain._functional_defaults = dict(self._functional_defaults)
|
||||
chain._string_defaults = dict(self._string_defaults)
|
||||
return chain
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_re(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_functional(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_string(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
if not pattern:
|
||||
raise ValueError("One pattern should be given to the chain")
|
||||
if len(pattern) > 1:
|
||||
raise ValueError("Only one pattern can be given to the chain")
|
||||
part = ChainPart(self, pattern[0])
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close chain builder to continue registering other pattern
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
Deeply close the chain
|
||||
:return: Rebulk instance
|
||||
"""
|
||||
return self.rebulk
|
||||
parent = self.parent
|
||||
while isinstance(parent, Chain):
|
||||
parent = parent.parent
|
||||
return parent
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
# pylint: disable=too-many-locals,too-many-nested-blocks
|
||||
|
@ -173,42 +70,20 @@ class Chain(Pattern):
|
|||
chain_found = False
|
||||
current_chain_matches = []
|
||||
valid_chain = True
|
||||
is_chain_start = True
|
||||
for chain_part in self.parts:
|
||||
try:
|
||||
chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part,
|
||||
chain_input_string,
|
||||
context)
|
||||
|
||||
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
|
||||
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
|
||||
|
||||
if raw_chain_part_matches:
|
||||
grouped_matches_dict = dict()
|
||||
for match_index, match in itertools.groupby(chain_part_matches,
|
||||
lambda m: m.match_index):
|
||||
grouped_matches_dict[match_index] = list(match)
|
||||
|
||||
grouped_raw_matches_dict = dict()
|
||||
for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
|
||||
lambda m: m.match_index):
|
||||
grouped_raw_matches_dict[match_index] = list(raw_match)
|
||||
|
||||
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
|
||||
chain_found = True
|
||||
offset = grouped_raw_matches[-1].raw_end
|
||||
chain_input_string = input_string[offset:]
|
||||
if not chain_part.is_hidden:
|
||||
grouped_matches = grouped_matches_dict.get(match_index, [])
|
||||
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
|
||||
current_chain_matches.extend(grouped_matches)
|
||||
chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
|
||||
context,
|
||||
with_raw_matches=True)
|
||||
|
||||
chain_found, chain_input_string, offset = \
|
||||
self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
||||
input_string, chain_input_string, offset, current_chain_matches)
|
||||
except _InvalidChainException:
|
||||
valid_chain = False
|
||||
if current_chain_matches:
|
||||
offset = current_chain_matches[0].raw_end
|
||||
break
|
||||
is_chain_start = False
|
||||
if not chain_found:
|
||||
break
|
||||
if current_chain_matches and valid_chain:
|
||||
|
@ -217,38 +92,66 @@ class Chain(Pattern):
|
|||
|
||||
return chain_matches
|
||||
|
||||
def _match_parent(self, match, yield_parent):
|
||||
def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
||||
input_string, chain_input_string, offset, current_chain_matches):
|
||||
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
|
||||
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
|
||||
|
||||
if raw_chain_part_matches:
|
||||
grouped_matches_dict = self._group_by_match_index(chain_part_matches)
|
||||
grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
|
||||
|
||||
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
|
||||
chain_found = True
|
||||
offset = grouped_raw_matches[-1].raw_end
|
||||
chain_input_string = input_string[offset:]
|
||||
|
||||
if not chain_part.is_hidden:
|
||||
grouped_matches = grouped_matches_dict.get(match_index, [])
|
||||
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
|
||||
current_chain_matches.extend(grouped_matches)
|
||||
return chain_found, chain_input_string, offset
|
||||
|
||||
def _process_match(self, match, match_index, child=False):
|
||||
"""
|
||||
Handle a parent match
|
||||
Handle a match
|
||||
:param match:
|
||||
:type match:
|
||||
:param yield_parent:
|
||||
:type yield_parent:
|
||||
:param match_index:
|
||||
:type match_index:
|
||||
:param child:
|
||||
:type child:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = super(Chain, self)._match_parent(match, yield_parent)
|
||||
original_children = Matches(match.children)
|
||||
original_end = match.end
|
||||
while not ret and match.children:
|
||||
last_pattern = match.children[-1].pattern
|
||||
last_pattern_children = [child for child in match.children if child.pattern == last_pattern]
|
||||
last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index)
|
||||
last_pattern_groups = {}
|
||||
for index, matches in last_pattern_groups_iter:
|
||||
last_pattern_groups[index] = list(matches)
|
||||
# pylint: disable=too-many-locals
|
||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
||||
if ret:
|
||||
return True
|
||||
|
||||
for index in reversed(list(last_pattern_groups)):
|
||||
last_matches = list(last_pattern_groups[index])
|
||||
for last_match in last_matches:
|
||||
match.children.remove(last_match)
|
||||
match.end = match.children[-1].end if match.children else match.start
|
||||
ret = super(Chain, self)._match_parent(match, yield_parent)
|
||||
if ret:
|
||||
return True
|
||||
match.children = original_children
|
||||
match.end = original_end
|
||||
return ret
|
||||
if match.children:
|
||||
last_pattern = match.children[-1].pattern
|
||||
last_pattern_groups = self._group_by_match_index(
|
||||
[child_ for child_ in match.children if child_.pattern == last_pattern]
|
||||
)
|
||||
|
||||
if last_pattern_groups:
|
||||
original_children = Matches(match.children)
|
||||
original_end = match.end
|
||||
|
||||
for index in reversed(list(last_pattern_groups)):
|
||||
last_matches = last_pattern_groups[index]
|
||||
for last_match in last_matches:
|
||||
match.children.remove(last_match)
|
||||
match.end = match.children[-1].end if match.children else match.start
|
||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
||||
if ret:
|
||||
return True
|
||||
|
||||
match.children = original_children
|
||||
match.end = original_end
|
||||
|
||||
return False
|
||||
|
||||
def _build_chain_match(self, current_chain_matches, input_string):
|
||||
start = None
|
||||
|
@ -282,46 +185,11 @@ class Chain(Pattern):
|
|||
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
|
||||
|
||||
@staticmethod
|
||||
def _match_chain_part(is_chain_start, chain_part, chain_input_string, context):
|
||||
chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context,
|
||||
with_raw_matches=True)
|
||||
chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part,
|
||||
chain_input_string)
|
||||
raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part,
|
||||
chain_input_string)
|
||||
|
||||
Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part)
|
||||
return chain_part_matches, raw_chain_part_matches
|
||||
|
||||
@staticmethod
|
||||
def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string):
|
||||
if not chain_part_matches:
|
||||
return chain_part_matches
|
||||
|
||||
if not is_chain_start:
|
||||
separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
|
||||
if separator:
|
||||
return []
|
||||
|
||||
j = 1
|
||||
for i in range(0, len(chain_part_matches) - 1):
|
||||
separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
|
||||
chain_part_matches[i + 1].initiator.raw_start]
|
||||
if separator:
|
||||
break
|
||||
j += 1
|
||||
truncated = chain_part_matches[:j]
|
||||
if chain_part.repeater_end is not None:
|
||||
truncated = [m for m in truncated if m.match_index < chain_part.repeater_end]
|
||||
return truncated
|
||||
|
||||
@staticmethod
|
||||
def _validate_chain_part_matches(chain_part_matches, chain_part):
|
||||
max_match_index = -1
|
||||
if chain_part_matches:
|
||||
max_match_index = max([m.match_index for m in chain_part_matches])
|
||||
if max_match_index + 1 < chain_part.repeater_start:
|
||||
raise _InvalidChainException
|
||||
def _group_by_match_index(matches):
|
||||
grouped_matches_dict = dict()
|
||||
for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
|
||||
grouped_matches_dict[match_index] = list(match)
|
||||
return grouped_matches_dict
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
|
@ -338,7 +206,7 @@ class Chain(Pattern):
|
|||
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
|
||||
|
||||
|
||||
class ChainPart(object):
|
||||
class ChainPart(BasePattern):
|
||||
"""
|
||||
Part of a pattern chain.
|
||||
"""
|
||||
|
@ -350,6 +218,51 @@ class ChainPart(object):
|
|||
self.repeater_end = 1
|
||||
self._hidden = False
|
||||
|
||||
@property
|
||||
def _is_chain_start(self):
|
||||
return self._chain.parts[0] == self
|
||||
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
|
||||
|
||||
matches = self._truncate_repeater(matches, input_string)
|
||||
raw_matches = self._truncate_repeater(raw_matches, input_string)
|
||||
|
||||
self._validate_repeater(raw_matches)
|
||||
|
||||
if with_raw_matches:
|
||||
return matches, raw_matches
|
||||
|
||||
return matches
|
||||
|
||||
def _truncate_repeater(self, matches, input_string):
|
||||
if not matches:
|
||||
return matches
|
||||
|
||||
if not self._is_chain_start:
|
||||
separator = input_string[0:matches[0].initiator.raw_start]
|
||||
if separator:
|
||||
return []
|
||||
|
||||
j = 1
|
||||
for i in range(0, len(matches) - 1):
|
||||
separator = input_string[matches[i].initiator.raw_end:
|
||||
matches[i + 1].initiator.raw_start]
|
||||
if separator:
|
||||
break
|
||||
j += 1
|
||||
truncated = matches[:j]
|
||||
if self.repeater_end is not None:
|
||||
truncated = [m for m in truncated if m.match_index < self.repeater_end]
|
||||
return truncated
|
||||
|
||||
def _validate_repeater(self, matches):
|
||||
max_match_index = -1
|
||||
if matches:
|
||||
max_match_index = max([m.match_index for m in matches])
|
||||
if max_match_index + 1 < self.repeater_start:
|
||||
raise _InvalidChainException
|
||||
|
||||
def chain(self):
|
||||
"""
|
||||
Add patterns chain, using configuration from this chain
|
||||
|
|
|
@ -15,9 +15,19 @@ def formatters(*chained_formatters):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def formatters_chain(input_string): # pylint:disable=missing-docstring
|
||||
for chained_formatter in chained_formatters:
|
||||
input_string = chained_formatter(input_string)
|
||||
return input_string
|
||||
|
||||
return formatters_chain
|
||||
|
||||
|
||||
def default_formatter(input_string):
|
||||
"""
|
||||
Default formatter
|
||||
:param input_string:
|
||||
:return:
|
||||
"""
|
||||
return input_string
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"""
|
||||
Introspect rebulk object to retrieve capabilities.
|
||||
"""
|
||||
from abc import ABCMeta, abstractproperty
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import defaultdict
|
||||
|
||||
import six
|
||||
|
@ -16,7 +16,8 @@ class Description(object):
|
|||
"""
|
||||
Abstract class for a description.
|
||||
"""
|
||||
@abstractproperty
|
||||
@property
|
||||
@abstractmethod
|
||||
def properties(self): # pragma: no cover
|
||||
"""
|
||||
Properties of described object.
|
||||
|
|
|
@ -4,12 +4,12 @@
|
|||
Various utilities functions
|
||||
"""
|
||||
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
|
||||
from inspect import isclass
|
||||
try:
|
||||
from inspect import getfullargspec as getargspec
|
||||
|
||||
_fullargspec_supported = True
|
||||
except ImportError:
|
||||
_fullargspec_supported = False
|
||||
|
@ -55,8 +55,8 @@ def call(function, *args, **kwargs):
|
|||
:return: sale vakye as default function call
|
||||
:rtype: object
|
||||
"""
|
||||
func = constructor_args if inspect.isclass(function) else function_args
|
||||
call_args, call_kwargs = func(function, *args, **kwargs)
|
||||
func = constructor_args if isclass(function) else function_args
|
||||
call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20
|
||||
return function(*call_args, **call_kwargs)
|
||||
|
||||
|
||||
|
@ -145,6 +145,8 @@ if not _fullargspec_supported:
|
|||
else:
|
||||
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
|
||||
return call_args, call_kwarg
|
||||
|
||||
|
||||
argspec_args = argspec_args_legacy
|
||||
|
||||
|
||||
|
@ -215,9 +217,12 @@ def filter_index(collection, predicate=None, index=None):
|
|||
return collection
|
||||
|
||||
|
||||
def set_defaults(defaults, kwargs):
|
||||
def set_defaults(defaults, kwargs, override=False):
|
||||
"""
|
||||
Set defaults from defaults dict to kwargs dict
|
||||
|
||||
:param override:
|
||||
:type override:
|
||||
:param defaults:
|
||||
:type defaults:
|
||||
:param kwargs:
|
||||
|
@ -225,12 +230,13 @@ def set_defaults(defaults, kwargs):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if 'clear' in defaults.keys() and defaults.pop('clear'):
|
||||
kwargs.clear()
|
||||
for key, value in defaults.items():
|
||||
if key not in kwargs and value is not None:
|
||||
if key in kwargs:
|
||||
if isinstance(value, list) and isinstance(kwargs[key], list):
|
||||
kwargs[key] = list(value) + kwargs[key]
|
||||
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
|
||||
set_defaults(value, kwargs[key])
|
||||
if key not in kwargs or override:
|
||||
kwargs[key] = value
|
||||
elif isinstance(value, list) and isinstance(kwargs[key], list):
|
||||
kwargs[key] = list(value) + kwargs[key]
|
||||
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
|
||||
set_defaults(value, kwargs[key])
|
||||
elif key in kwargs and value is None:
|
||||
kwargs[key] = None
|
||||
|
|
|
@ -815,6 +815,24 @@ class Match(object):
|
|||
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def tagged(self, *tags):
|
||||
"""
|
||||
Check if this match has at least one of the provided tags
|
||||
|
||||
:param tags:
|
||||
:return: True if at least one tag is defined, False otherwise.
|
||||
"""
|
||||
return any(tag in self.tags for tag in tags)
|
||||
|
||||
def named(self, *names):
|
||||
"""
|
||||
Check if one of the children match has one of the provided name
|
||||
|
||||
:param names:
|
||||
:return: True if at least one child is named with a given name is defined, False otherwise.
|
||||
"""
|
||||
return any(name in self.names for name in names)
|
||||
|
||||
def __len__(self):
|
||||
return self.end - self.start
|
||||
|
||||
|
|
|
@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty
|
|||
import six
|
||||
|
||||
from . import debug
|
||||
from .formatters import default_formatter
|
||||
from .loose import call, ensure_list, ensure_dict
|
||||
from .match import Match
|
||||
from .remodule import re, REGEX_AVAILABLE
|
||||
from .utils import find_all, is_iterable, get_first_defined
|
||||
from .validators import allways_true
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Pattern(object):
|
||||
class BasePattern(object):
|
||||
"""
|
||||
Base class for Pattern like objects
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
"""
|
||||
Computes all matches for a given input
|
||||
|
||||
:param input_string: the string to parse
|
||||
:type input_string: str
|
||||
:param context: the context
|
||||
:type context: dict
|
||||
:param with_raw_matches: should return details
|
||||
:type with_raw_matches: dict
|
||||
:return: matches based on input_string for this pattern
|
||||
:rtype: iterator[Match]
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Pattern(BasePattern):
|
||||
"""
|
||||
Definition of a particular pattern to search for.
|
||||
"""
|
||||
|
@ -25,7 +50,7 @@ class Pattern(object):
|
|||
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
|
||||
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
|
||||
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
|
||||
properties=None, post_processor=None, **kwargs):
|
||||
properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
|
||||
"""
|
||||
:param name: Name of this pattern
|
||||
:type name: str
|
||||
|
@ -66,15 +91,19 @@ class Pattern(object):
|
|||
:type disabled: bool|function
|
||||
:param log_lvl: Log level associated to this pattern
|
||||
:type log_lvl: int
|
||||
:param post_process: Post processing function
|
||||
:param post_processor: Post processing function
|
||||
:type post_processor: func
|
||||
:param pre_match_processor: Pre match processing function
|
||||
:type pre_match_processor: func
|
||||
:param post_match_processor: Post match processing function
|
||||
:type post_match_processor: func
|
||||
"""
|
||||
# pylint:disable=too-many-locals,unused-argument
|
||||
self.name = name
|
||||
self.tags = ensure_list(tags)
|
||||
self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x)
|
||||
self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
|
||||
self.values, self._default_value = ensure_dict(value, None)
|
||||
self.validators, self._default_validator = ensure_dict(validator, lambda match: True)
|
||||
self.validators, self._default_validator = ensure_dict(validator, allways_true)
|
||||
self.every = every
|
||||
self.children = children
|
||||
self.private = private
|
||||
|
@ -96,6 +125,14 @@ class Pattern(object):
|
|||
self.post_processor = None
|
||||
else:
|
||||
self.post_processor = post_processor
|
||||
if not callable(pre_match_processor):
|
||||
self.pre_match_processor = None
|
||||
else:
|
||||
self.pre_match_processor = pre_match_processor
|
||||
if not callable(post_match_processor):
|
||||
self.post_match_processor = None
|
||||
else:
|
||||
self.post_match_processor = post_match_processor
|
||||
|
||||
@property
|
||||
def log_level(self):
|
||||
|
@ -106,83 +143,6 @@ class Pattern(object):
|
|||
"""
|
||||
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
|
||||
|
||||
def _yield_children(self, match):
|
||||
"""
|
||||
Does this match has children
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return match.children and (self.children or self.every)
|
||||
|
||||
def _yield_parent(self):
|
||||
"""
|
||||
Does this mat
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return not self.children or self.every
|
||||
|
||||
def _match_parent(self, match, yield_parent):
|
||||
"""
|
||||
Handle a parent match
|
||||
:param match:
|
||||
:type match:
|
||||
:param yield_parent:
|
||||
:type yield_parent:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not match or match.value == "":
|
||||
return False
|
||||
|
||||
pattern_value = get_first_defined(self.values, [match.name, '__parent__', None],
|
||||
self._default_value)
|
||||
if pattern_value:
|
||||
match.value = pattern_value
|
||||
|
||||
if yield_parent or self.format_all:
|
||||
match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None],
|
||||
self._default_formatter)
|
||||
if yield_parent or self.validate_all:
|
||||
validator = get_first_defined(self.validators, [match.name, '__parent__', None],
|
||||
self._default_validator)
|
||||
if validator and not validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _match_child(self, child, yield_children):
|
||||
"""
|
||||
Handle a children match
|
||||
:param child:
|
||||
:type child:
|
||||
:param yield_children:
|
||||
:type yield_children:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not child or child.value == "":
|
||||
return False
|
||||
|
||||
pattern_value = get_first_defined(self.values, [child.name, '__children__', None],
|
||||
self._default_value)
|
||||
if pattern_value:
|
||||
child.value = pattern_value
|
||||
|
||||
if yield_children or self.format_all:
|
||||
child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None],
|
||||
self._default_formatter)
|
||||
|
||||
if yield_children or self.validate_all:
|
||||
validator = get_first_defined(self.validators, [child.name, '__children__', None],
|
||||
self._default_validator)
|
||||
if validator and not validator(child):
|
||||
return False
|
||||
return True
|
||||
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
"""
|
||||
Computes all matches for a given input
|
||||
|
@ -200,41 +160,168 @@ class Pattern(object):
|
|||
|
||||
matches = []
|
||||
raw_matches = []
|
||||
|
||||
for pattern in self.patterns:
|
||||
yield_parent = self._yield_parent()
|
||||
match_index = -1
|
||||
match_index = 0
|
||||
for match in self._match(pattern, input_string, context):
|
||||
match_index += 1
|
||||
match.match_index = match_index
|
||||
raw_matches.append(match)
|
||||
yield_children = self._yield_children(match)
|
||||
if not self._match_parent(match, yield_parent):
|
||||
continue
|
||||
validated = True
|
||||
for child in match.children:
|
||||
if not self._match_child(child, yield_children):
|
||||
validated = False
|
||||
break
|
||||
if validated:
|
||||
if self.private_parent:
|
||||
match.private = True
|
||||
if self.private_children:
|
||||
for child in match.children:
|
||||
child.private = True
|
||||
if yield_parent or self.private_parent:
|
||||
matches.append(match)
|
||||
if yield_children or self.private_children:
|
||||
for child in match.children:
|
||||
child.match_index = match_index
|
||||
matches.append(child)
|
||||
matches = self._matches_post_process(matches)
|
||||
self._matches_privatize(matches)
|
||||
self._matches_ignore(matches)
|
||||
matches.extend(self._process_matches(match, match_index))
|
||||
match_index += 1
|
||||
|
||||
matches = self._post_process_matches(matches)
|
||||
|
||||
if with_raw_matches:
|
||||
return matches, raw_matches
|
||||
return matches
|
||||
|
||||
def _matches_post_process(self, matches):
|
||||
@property
|
||||
def _should_include_children(self):
|
||||
"""
|
||||
Check if children matches from this pattern should be included in matches results.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self.children or self.every
|
||||
|
||||
@property
|
||||
def _should_include_parent(self):
|
||||
"""
|
||||
Check is a match from this pattern should be included in matches results.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return not self.children or self.every
|
||||
|
||||
@staticmethod
|
||||
def _match_config_property_keys(match, child=False):
|
||||
if match.name:
|
||||
yield match.name
|
||||
if child:
|
||||
yield '__children__'
|
||||
else:
|
||||
yield '__parent__'
|
||||
yield None
|
||||
|
||||
@staticmethod
|
||||
def _process_match_index(match, match_index):
|
||||
"""
|
||||
Process match index from this pattern process state.
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
match.match_index = match_index
|
||||
|
||||
def _process_match_private(self, match, child=False):
|
||||
"""
|
||||
Process match privacy from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:param child:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if match.name and match.name in self.private_names or \
|
||||
not child and self.private_parent or \
|
||||
child and self.private_children:
|
||||
match.private = True
|
||||
|
||||
def _process_match_value(self, match, child=False):
|
||||
"""
|
||||
Process match value from this pattern configuration.
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
pattern_value = get_first_defined(self.values, keys, self._default_value)
|
||||
if pattern_value:
|
||||
match.value = pattern_value
|
||||
|
||||
def _process_match_formatter(self, match, child=False):
|
||||
"""
|
||||
Process match formatter from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
included = self._should_include_children if child else self._should_include_parent
|
||||
if included or self.format_all:
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
|
||||
|
||||
def _process_match_validator(self, match, child=False):
|
||||
"""
|
||||
Process match validation from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:return: True if match is validated by the configured validator, False otherwise.
|
||||
"""
|
||||
included = self._should_include_children if child else self._should_include_parent
|
||||
if included or self.validate_all:
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
validator = get_first_defined(self.validators, keys, self._default_validator)
|
||||
if validator and not validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _process_match(self, match, match_index, child=False):
|
||||
"""
|
||||
Process match from this pattern by setting all properties from defined configuration
|
||||
(index, private, value, formatter, validator, ...).
|
||||
|
||||
:param match:
|
||||
:type match:
|
||||
:return: True if match is validated by the configured validator, False otherwise.
|
||||
:rtype:
|
||||
"""
|
||||
self._process_match_index(match, match_index)
|
||||
self._process_match_private(match, child)
|
||||
self._process_match_value(match, child)
|
||||
self._process_match_formatter(match, child)
|
||||
return self._process_match_validator(match, child)
|
||||
|
||||
@staticmethod
|
||||
def _process_match_processor(match, processor):
|
||||
if processor:
|
||||
ret = processor(match)
|
||||
if ret is not None:
|
||||
return ret
|
||||
return match
|
||||
|
||||
def _process_matches(self, match, match_index):
|
||||
"""
|
||||
Process and generate all matches for the given unprocessed match.
|
||||
:param match:
|
||||
:param match_index:
|
||||
:return: Process and dispatched matches.
|
||||
"""
|
||||
match = self._process_match_processor(match, self.pre_match_processor)
|
||||
if not match:
|
||||
return
|
||||
|
||||
if not self._process_match(match, match_index):
|
||||
return
|
||||
|
||||
for child in match.children:
|
||||
if not self._process_match(child, match_index, child=True):
|
||||
return
|
||||
|
||||
match = self._process_match_processor(match, self.post_match_processor)
|
||||
if not match:
|
||||
return
|
||||
|
||||
if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
|
||||
yield match
|
||||
if self._should_include_children or self.private_children:
|
||||
children = [x for x in match.children if x.name not in self.ignore_names]
|
||||
for child in children:
|
||||
yield child
|
||||
|
||||
def _post_process_matches(self, matches):
|
||||
"""
|
||||
Post process matches with user defined function
|
||||
:param matches:
|
||||
|
@ -246,32 +333,6 @@ class Pattern(object):
|
|||
return self.post_processor(matches, self)
|
||||
return matches
|
||||
|
||||
def _matches_privatize(self, matches):
|
||||
"""
|
||||
Mark matches included in private_names with private flag.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.private_names:
|
||||
for match in matches:
|
||||
if match.name in self.private_names:
|
||||
match.private = True
|
||||
|
||||
def _matches_ignore(self, matches):
|
||||
"""
|
||||
Ignore matches included in ignore_names.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.ignore_names:
|
||||
for match in list(matches):
|
||||
if match.name in self.ignore_names:
|
||||
matches.remove(match)
|
||||
|
||||
@abstractproperty
|
||||
def patterns(self): # pragma: no cover
|
||||
"""
|
||||
|
@ -306,7 +367,7 @@ class Pattern(object):
|
|||
@abstractmethod
|
||||
def _match(self, pattern, input_string, context=None): # pragma: no cover
|
||||
"""
|
||||
Computes all matches for a given pattern and input
|
||||
Computes all unprocess matches for a given pattern and input.
|
||||
|
||||
:param pattern: the pattern to use
|
||||
:param input_string: the string to parse
|
||||
|
@ -350,7 +411,9 @@ class StringPattern(Pattern):
|
|||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
for index in find_all(input_string, pattern, **self._kwargs):
|
||||
yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
|
||||
class RePattern(Pattern):
|
||||
|
@ -411,15 +474,18 @@ class RePattern(Pattern):
|
|||
for start, end in match_object.spans(i):
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
main_match.children.append(child_match)
|
||||
if child_match:
|
||||
main_match.children.append(child_match)
|
||||
else:
|
||||
start, end = match_object.span(i)
|
||||
if start > -1 and end > -1:
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
main_match.children.append(child_match)
|
||||
if child_match:
|
||||
main_match.children.append(child_match)
|
||||
|
||||
yield main_match
|
||||
if main_match:
|
||||
yield main_match
|
||||
|
||||
|
||||
class FunctionalPattern(Pattern):
|
||||
|
@ -457,14 +523,18 @@ class FunctionalPattern(Pattern):
|
|||
if self._match_kwargs:
|
||||
options = self._match_kwargs.copy()
|
||||
options.update(args)
|
||||
yield Match(pattern=self, input_string=input_string, **options)
|
||||
match = Match(pattern=self, input_string=input_string, **options)
|
||||
if match:
|
||||
yield match
|
||||
else:
|
||||
kwargs = self._match_kwargs
|
||||
if isinstance(args[-1], dict):
|
||||
kwargs = dict(kwargs)
|
||||
kwargs.update(args[-1])
|
||||
args = args[:-1]
|
||||
yield Match(*args, pattern=self, input_string=input_string, **kwargs)
|
||||
match = Match(*args, pattern=self, input_string=input_string, **kwargs)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
|
||||
def filter_match_kwargs(kwargs, children=False):
|
||||
|
|
|
@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk
|
|||
"""
|
||||
from logging import getLogger
|
||||
|
||||
from .builder import Builder
|
||||
from .match import Matches
|
||||
|
||||
from .pattern import RePattern, StringPattern, FunctionalPattern
|
||||
from .chain import Chain
|
||||
|
||||
from .processors import ConflictSolver, PrivateRemover
|
||||
from .loose import set_defaults
|
||||
from .utils import extend_safe
|
||||
from .rules import Rules
|
||||
from .utils import extend_safe
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
class Rebulk(object):
|
||||
class Rebulk(Builder):
|
||||
r"""
|
||||
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
|
||||
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
|
||||
|
@ -44,6 +40,7 @@ class Rebulk(object):
|
|||
>>> bulk.matches("the lakers are from la")
|
||||
[<lakers:(4, 10)>, <la:(20, 22)>]
|
||||
"""
|
||||
|
||||
# pylint:disable=protected-access
|
||||
|
||||
def __init__(self, disabled=lambda context: False, default_rules=True):
|
||||
|
@ -56,6 +53,7 @@ class Rebulk(object):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
super(Rebulk, self).__init__()
|
||||
if not callable(disabled):
|
||||
self.disabled = lambda context: disabled
|
||||
else:
|
||||
|
@ -64,11 +62,6 @@ class Rebulk(object):
|
|||
self._rules = Rules()
|
||||
if default_rules:
|
||||
self.rules(ConflictSolver, PrivateRemover)
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
self._chain_defaults = {}
|
||||
self._rebulks = []
|
||||
|
||||
def pattern(self, *pattern):
|
||||
|
@ -83,172 +76,6 @@ class Rebulk(object):
|
|||
self._patterns.extend(pattern)
|
||||
return self
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._regex_defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_re(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_re(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new regular expression pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return RePattern(*pattern, **kwargs)
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._string_defaults = kwargs
|
||||
return self
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_string(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._string_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return StringPattern(*pattern, **kwargs)
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._functional_defaults = kwargs
|
||||
return self
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_functional(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return FunctionalPattern(*pattern, **kwargs)
|
||||
|
||||
def chain_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for patterns chain.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._chain_defaults = kwargs
|
||||
return self
|
||||
|
||||
def chain(self, **kwargs):
|
||||
"""
|
||||
Add patterns chain, using configuration of this rebulk
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
chain = self.build_chain(**kwargs)
|
||||
self._patterns.append(chain)
|
||||
return chain
|
||||
|
||||
def build_chain(self, **kwargs):
|
||||
"""
|
||||
Builds a new patterns chain
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._chain_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return Chain(self, **kwargs)
|
||||
|
||||
def rules(self, *rules):
|
||||
"""
|
||||
Add rules as a module, class or instance.
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
|
||||
import re
|
||||
|
||||
from functools import partial
|
||||
|
||||
from rebulk.pattern import FunctionalPattern, StringPattern, RePattern
|
||||
from ..rebulk import Rebulk
|
||||
from ..validators import chars_surround
|
||||
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
|
||||
|
||||
|
||||
def test_chain_close():
|
||||
|
@ -63,18 +63,61 @@ def test_build_chain():
|
|||
|
||||
def test_chain_defaults():
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
|
||||
rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True)
|
||||
|
||||
rebulk.chain()\
|
||||
rebulk.chain() \
|
||||
.regex("(?P<test>test)") \
|
||||
.regex(" ").repeater("*") \
|
||||
.regex("(?P<best>best)") \
|
||||
.regex(" ").repeater("*") \
|
||||
.regex("(?P<testIgnore>testIgnore)")
|
||||
matches = rebulk.matches("test testIgnore")
|
||||
matches = rebulk.matches("test best testIgnore")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
|
||||
|
||||
def test_chain_with_validators():
|
||||
def chain_validator(match):
|
||||
return match.value.startswith('t') and match.value.endswith('t')
|
||||
|
||||
def default_validator(match):
|
||||
return match.value.startswith('t') and match.value.endswith('g')
|
||||
|
||||
def custom_validator(match):
|
||||
return match.value.startswith('b') and match.value.endswith('t')
|
||||
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(children=True, validator=default_validator)
|
||||
|
||||
rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \
|
||||
.regex("(?P<test>testing)", validator=default_validator).repeater("+") \
|
||||
.regex(" ").repeater("+") \
|
||||
.regex("(?P<best>best)", validator=custom_validator).repeater("+")
|
||||
matches = rebulk.matches("some testing best end")
|
||||
|
||||
assert len(matches) == 2
|
||||
assert matches[0].name == "test"
|
||||
assert matches[1].name == "best"
|
||||
|
||||
|
||||
def test_matches_docs():
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \
|
||||
.defaults(children=True, formatter={'episode': int, 'version': int}) \
|
||||
.chain() \
|
||||
.regex(r'e(?P<episode>\d{1,4})').repeater(1) \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
|
||||
.close() # .repeater(1) could be omitted as it's the default behavior
|
||||
|
||||
result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict
|
||||
|
||||
assert 'episode' in result
|
||||
assert result['episode'] == [14, 15, 16, 17]
|
||||
assert 'version' in result
|
||||
assert result['version'] == 2
|
||||
|
||||
|
||||
def test_matches():
|
||||
rebulk = Rebulk()
|
||||
|
||||
|
@ -144,8 +187,8 @@ def test_matches():
|
|||
def test_matches_2():
|
||||
rebulk = Rebulk() \
|
||||
.regex_defaults(flags=re.IGNORECASE) \
|
||||
.chain(children=True, formatter={'episode': int}) \
|
||||
.defaults(formatter={'version': int}) \
|
||||
.defaults(children=True, formatter={'episode': int, 'version': int}) \
|
||||
.chain() \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
|
||||
|
@ -173,25 +216,32 @@ def test_matches_2():
|
|||
def test_matches_3():
|
||||
alt_dash = (r'@', r'[\W_]') # abbreviation
|
||||
|
||||
rebulk = Rebulk()
|
||||
match_names = ['season', 'episode']
|
||||
other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']
|
||||
|
||||
rebulk.chain(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
private_names=['episodeSeparator', 'seasonSeparator'],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=lambda match, other: match
|
||||
if match.name in ['season', 'episode'] and other.name in
|
||||
['screen_size', 'video_codec', 'audio_codec',
|
||||
'audio_channels', 'container', 'date']
|
||||
else '__default__') \
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
private_names=['episodeSeparator', 'seasonSeparator'],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=lambda match, other: match
|
||||
if match.name in match_names and other.name in other_names
|
||||
else '__default__')
|
||||
|
||||
rebulk.chain() \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
|
||||
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
|
||||
.close() \
|
||||
.chain() \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
|
||||
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
|
||||
.close() \
|
||||
.chain() \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'S(?P<season>\d+)') \
|
||||
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
|
||||
|
||||
|
@ -240,11 +290,11 @@ def test_matches_4():
|
|||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
rebulk.defaults(validate_all=True, children=True)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \
|
||||
.defaults(formatter={'episode': int, 'version': int}) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
|
@ -262,11 +312,11 @@ def test_matches_5():
|
|||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True,
|
||||
formatter={'episode': int, 'version': int}) \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
@ -288,7 +338,7 @@ def test_matches_6():
|
|||
validator=None, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
|
|
@ -2,19 +2,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
|
||||
|
||||
from .default_rules_module import RuleRemove0
|
||||
from .. import debug
|
||||
from ..match import Match
|
||||
from ..pattern import StringPattern
|
||||
from ..rebulk import Rebulk
|
||||
from ..match import Match
|
||||
from .. import debug
|
||||
from .default_rules_module import RuleRemove0
|
||||
|
||||
|
||||
class TestDebug(object):
|
||||
|
||||
|
||||
#request.addfinalizer(disable_debug)
|
||||
|
||||
|
||||
# request.addfinalizer(disable_debug)
|
||||
|
||||
debug.DEBUG = True
|
||||
pattern = StringPattern(1, 3, value="es")
|
||||
|
@ -38,43 +34,43 @@ class TestDebug(object):
|
|||
debug.DEBUG = False
|
||||
|
||||
def test_pattern(self):
|
||||
assert self.pattern.defined_at.lineno == 20
|
||||
assert self.pattern.defined_at.lineno > 0
|
||||
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.pattern.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.pattern.defined_at) == 'test_debug.py#L20'
|
||||
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
|
||||
assert str(self.pattern.defined_at).startswith('test_debug.py#L')
|
||||
assert repr(self.pattern).startswith('<StringPattern@test_debug.py#L')
|
||||
|
||||
def test_match(self):
|
||||
assert self.match.defined_at.lineno == 22
|
||||
assert self.match.defined_at.lineno > 0
|
||||
assert self.match.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.match.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.match.defined_at) == 'test_debug.py#L22'
|
||||
assert str(self.match.defined_at).startswith('test_debug.py#L')
|
||||
|
||||
def test_rule(self):
|
||||
assert self.rule.defined_at.lineno == 23
|
||||
assert self.rule.defined_at.lineno > 0
|
||||
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rule.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rule.defined_at) == 'test_debug.py#L23'
|
||||
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
|
||||
assert str(self.rule.defined_at).startswith('test_debug.py#L')
|
||||
assert repr(self.rule).startswith('<RuleRemove0@test_debug.py#L')
|
||||
|
||||
def test_rebulk(self):
|
||||
"""
|
||||
This test fails on travis CI, can't find out why there's 1 line offset ...
|
||||
"""
|
||||
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
|
||||
assert self.rebulk._patterns[0].defined_at.lineno > 0
|
||||
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
|
||||
assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L')
|
||||
|
||||
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
|
||||
assert self.rebulk._patterns[1].defined_at.lineno > 0
|
||||
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
|
||||
assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L')
|
||||
|
||||
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
|
||||
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at
|
||||
|
|
|
@ -116,6 +116,9 @@ class TestMatchesClass(object):
|
|||
assert "tag1" in matches.tags
|
||||
assert "tag2" in matches.tags
|
||||
|
||||
assert self.match3.tagged("tag1")
|
||||
assert not self.match3.tagged("start")
|
||||
|
||||
tag1 = matches.tagged("tag1")
|
||||
assert len(tag1) == 2
|
||||
assert tag1[0] == self.match2
|
||||
|
|
|
@ -62,9 +62,20 @@ def validators(*chained_validators):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def validator_chain(match): # pylint:disable=missing-docstring
|
||||
for chained_validator in chained_validators:
|
||||
if not chained_validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
return validator_chain
|
||||
|
||||
|
||||
def allways_true(match): # pylint:disable=unused-argument
|
||||
"""
|
||||
A validator which is allways true
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
return True
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue