mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-14 10:36:52 -07:00
Move common libs to libs/common
This commit is contained in:
parent
8dbb1a2451
commit
1f4bd41bcc
1612 changed files with 962 additions and 10 deletions
10
libs/common/rebulk/__init__.py
Normal file
10
libs/common/rebulk/__init__.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Define simple search patterns in bulk to perform advanced matching on any string.
|
||||
"""
|
||||
# pylint:disable=import-self
|
||||
from .rebulk import Rebulk
|
||||
from .rules import Rule, CustomRule, AppendMatch, RemoveMatch, RenameMatch, AppendTags, RemoveTags
|
||||
from .processors import ConflictSolver, PrivateRemover, POST_PROCESS, PRE_PROCESS
|
||||
from .pattern import REGEX_AVAILABLE
|
7
libs/common/rebulk/__version__.py
Normal file
7
libs/common/rebulk/__version__.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '1.0.0'
|
467
libs/common/rebulk/chain.py
Normal file
467
libs/common/rebulk/chain.py
Normal file
|
@ -0,0 +1,467 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Chain patterns and handle repetiting capture group
|
||||
"""
|
||||
# pylint: disable=super-init-not-called
|
||||
import itertools
|
||||
|
||||
from .loose import call, set_defaults
|
||||
from .match import Match, Matches
|
||||
from .pattern import Pattern, filter_match_kwargs
|
||||
from .remodule import re
|
||||
|
||||
|
||||
class _InvalidChainException(Exception):
|
||||
"""
|
||||
Internal exception raised when a chain is not valid
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class Chain(Pattern):
|
||||
"""
|
||||
Definition of a pattern chain to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, rebulk, chain_breaker=None, **kwargs):
|
||||
call(super(Chain, self).__init__, **kwargs)
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
if callable(chain_breaker):
|
||||
self.chain_breaker = chain_breaker
|
||||
else:
|
||||
self.chain_breaker = None
|
||||
self.rebulk = rebulk
|
||||
self.parts = []
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._regex_defaults = kwargs
|
||||
return self
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._string_defaults = kwargs
|
||||
return self
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._functional_defaults = kwargs
|
||||
return self
|
||||
|
||||
def chain(self):
|
||||
"""
|
||||
Add patterns chain, using configuration from this chain
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint: disable=protected-access
|
||||
chain = self.rebulk.chain(**self._kwargs)
|
||||
chain._defaults = dict(self._defaults)
|
||||
chain._regex_defaults = dict(self._regex_defaults)
|
||||
chain._functional_defaults = dict(self._functional_defaults)
|
||||
chain._string_defaults = dict(self._string_defaults)
|
||||
return chain
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_re(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_functional(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_string(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close chain builder to continue registering other pattern
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self.rebulk
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
# pylint: disable=too-many-locals,too-many-nested-blocks
|
||||
chain_matches = []
|
||||
chain_input_string = input_string
|
||||
offset = 0
|
||||
while offset < len(input_string):
|
||||
chain_found = False
|
||||
current_chain_matches = []
|
||||
valid_chain = True
|
||||
is_chain_start = True
|
||||
for chain_part in self.parts:
|
||||
try:
|
||||
chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part,
|
||||
chain_input_string,
|
||||
context)
|
||||
|
||||
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
|
||||
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
|
||||
|
||||
if raw_chain_part_matches:
|
||||
grouped_matches_dict = dict()
|
||||
for match_index, match in itertools.groupby(chain_part_matches,
|
||||
lambda m: m.match_index):
|
||||
grouped_matches_dict[match_index] = list(match)
|
||||
|
||||
grouped_raw_matches_dict = dict()
|
||||
for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
|
||||
lambda m: m.match_index):
|
||||
grouped_raw_matches_dict[match_index] = list(raw_match)
|
||||
|
||||
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
|
||||
chain_found = True
|
||||
offset = grouped_raw_matches[-1].raw_end
|
||||
chain_input_string = input_string[offset:]
|
||||
if not chain_part.is_hidden:
|
||||
grouped_matches = grouped_matches_dict.get(match_index, [])
|
||||
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
|
||||
current_chain_matches.extend(grouped_matches)
|
||||
|
||||
except _InvalidChainException:
|
||||
valid_chain = False
|
||||
if current_chain_matches:
|
||||
offset = current_chain_matches[0].raw_end
|
||||
break
|
||||
is_chain_start = False
|
||||
if not chain_found:
|
||||
break
|
||||
if current_chain_matches and valid_chain:
|
||||
match = self._build_chain_match(current_chain_matches, input_string)
|
||||
chain_matches.append(match)
|
||||
|
||||
return chain_matches
|
||||
|
||||
def _match_parent(self, match, yield_parent):
|
||||
"""
|
||||
Handle a parent match
|
||||
:param match:
|
||||
:type match:
|
||||
:param yield_parent:
|
||||
:type yield_parent:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = super(Chain, self)._match_parent(match, yield_parent)
|
||||
original_children = Matches(match.children)
|
||||
original_end = match.end
|
||||
while not ret and match.children:
|
||||
last_pattern = match.children[-1].pattern
|
||||
last_pattern_children = [child for child in match.children if child.pattern == last_pattern]
|
||||
last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index)
|
||||
last_pattern_groups = {}
|
||||
for index, matches in last_pattern_groups_iter:
|
||||
last_pattern_groups[index] = list(matches)
|
||||
|
||||
for index in reversed(list(last_pattern_groups)):
|
||||
last_matches = list(last_pattern_groups[index])
|
||||
for last_match in last_matches:
|
||||
match.children.remove(last_match)
|
||||
match.end = match.children[-1].end if match.children else match.start
|
||||
ret = super(Chain, self)._match_parent(match, yield_parent)
|
||||
if ret:
|
||||
return True
|
||||
match.children = original_children
|
||||
match.end = original_end
|
||||
return ret
|
||||
|
||||
def _build_chain_match(self, current_chain_matches, input_string):
|
||||
start = None
|
||||
end = None
|
||||
for match in current_chain_matches:
|
||||
if start is None or start > match.start:
|
||||
start = match.start
|
||||
if end is None or end < match.end:
|
||||
end = match.end
|
||||
match = call(Match, start, end, pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
for chain_match in current_chain_matches:
|
||||
if chain_match.children:
|
||||
for child in chain_match.children:
|
||||
match.children.append(child)
|
||||
if chain_match not in match.children:
|
||||
match.children.append(chain_match)
|
||||
chain_match.parent = match
|
||||
return match
|
||||
|
||||
def _chain_breaker_eval(self, matches):
|
||||
return not self.chain_breaker or not self.chain_breaker(Matches(matches))
|
||||
|
||||
@staticmethod
|
||||
def _fix_matches_offset(chain_part_matches, input_string, offset):
|
||||
for chain_part_match in chain_part_matches:
|
||||
if chain_part_match.input_string != input_string:
|
||||
chain_part_match.input_string = input_string
|
||||
chain_part_match.end += offset
|
||||
chain_part_match.start += offset
|
||||
if chain_part_match.children:
|
||||
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
|
||||
|
||||
@staticmethod
|
||||
def _match_chain_part(is_chain_start, chain_part, chain_input_string, context):
|
||||
chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context,
|
||||
with_raw_matches=True)
|
||||
chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part,
|
||||
chain_input_string)
|
||||
raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part,
|
||||
chain_input_string)
|
||||
|
||||
Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part)
|
||||
return chain_part_matches, raw_chain_part_matches
|
||||
|
||||
@staticmethod
|
||||
def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string):
|
||||
if not chain_part_matches:
|
||||
return chain_part_matches
|
||||
|
||||
if not is_chain_start:
|
||||
separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
|
||||
if separator:
|
||||
return []
|
||||
|
||||
j = 1
|
||||
for i in range(0, len(chain_part_matches) - 1):
|
||||
separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
|
||||
chain_part_matches[i + 1].initiator.raw_start]
|
||||
if separator:
|
||||
break
|
||||
j += 1
|
||||
truncated = chain_part_matches[:j]
|
||||
if chain_part.repeater_end is not None:
|
||||
truncated = [m for m in truncated if m.match_index < chain_part.repeater_end]
|
||||
return truncated
|
||||
|
||||
@staticmethod
|
||||
def _validate_chain_part_matches(chain_part_matches, chain_part):
|
||||
max_match_index = -1
|
||||
if chain_part_matches:
|
||||
max_match_index = max([m.match_index for m in chain_part_matches])
|
||||
if max_match_index + 1 < chain_part.repeater_start:
|
||||
raise _InvalidChainException
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
return {}
|
||||
|
||||
@property
|
||||
def patterns(self):
|
||||
return [self]
|
||||
|
||||
def __repr__(self):
|
||||
defined = ""
|
||||
if self.defined_at:
|
||||
defined = "@%s" % (self.defined_at,)
|
||||
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
|
||||
|
||||
|
||||
class ChainPart(object):
|
||||
"""
|
||||
Part of a pattern chain.
|
||||
"""
|
||||
|
||||
def __init__(self, chain, pattern):
|
||||
self._chain = chain
|
||||
self.pattern = pattern
|
||||
self.repeater_start = 1
|
||||
self.repeater_end = 1
|
||||
self._hidden = False
|
||||
|
||||
def chain(self):
|
||||
"""
|
||||
Add patterns chain, using configuration from this chain
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.chain()
|
||||
|
||||
def hidden(self, hidden=True):
|
||||
"""
|
||||
Hide chain part results from global chain result
|
||||
|
||||
:param hidden:
|
||||
:type hidden:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._hidden = hidden
|
||||
return self
|
||||
|
||||
@property
|
||||
def is_hidden(self):
|
||||
"""
|
||||
Check if the chain part is hidden
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._hidden
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.regex(*pattern, **kwargs)
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.functional(*pattern, **kwargs)
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.string(*pattern, **kwargs)
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close the chain builder to continue registering other patterns
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.close()
|
||||
|
||||
def repeater(self, value):
|
||||
"""
|
||||
Define the repeater of the current chain part.
|
||||
|
||||
:param value:
|
||||
:type value:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
try:
|
||||
value = int(value)
|
||||
self.repeater_start = value
|
||||
self.repeater_end = value
|
||||
return self
|
||||
except ValueError:
|
||||
pass
|
||||
if value == '+':
|
||||
self.repeater_start = 1
|
||||
self.repeater_end = None
|
||||
if value == '*':
|
||||
self.repeater_start = 0
|
||||
self.repeater_end = None
|
||||
elif value == '?':
|
||||
self.repeater_start = 0
|
||||
self.repeater_end = 1
|
||||
else:
|
||||
match = re.match(r'\{\s*(\d*)\s*,?\s*(\d*)\s*\}', value)
|
||||
if match:
|
||||
start = match.group(1)
|
||||
end = match.group(2)
|
||||
if start or end:
|
||||
self.repeater_start = int(start) if start else 0
|
||||
self.repeater_end = int(end) if end else None
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
return "%s({%s,%s})" % (self.pattern, self.repeater_start, self.repeater_end)
|
56
libs/common/rebulk/debug.py
Normal file
56
libs/common/rebulk/debug.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Debug tools.
|
||||
|
||||
Can be configured by changing values of those variable.
|
||||
|
||||
DEBUG = False
|
||||
Enable this variable to activate debug features (like defined_at parameters). It can slow down Rebulk
|
||||
|
||||
LOG_LEVEL = 0
|
||||
Default log level of generated rebulk logs.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import logging
|
||||
import os
|
||||
from collections import namedtuple
|
||||
|
||||
|
||||
DEBUG = False
|
||||
LOG_LEVEL = logging.DEBUG
|
||||
|
||||
|
||||
class Frame(namedtuple('Frame', ['lineno', 'package', 'name', 'filename'])):
|
||||
"""
|
||||
Stack frame representation.
|
||||
"""
|
||||
__slots__ = ()
|
||||
|
||||
def __repr__(self):
|
||||
return "%s#L%s" % (os.path.basename(self.filename), self.lineno)
|
||||
|
||||
|
||||
def defined_at():
|
||||
"""
|
||||
Get definition location of a pattern or a match (outside of rebulk package).
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if DEBUG:
|
||||
frame = inspect.currentframe()
|
||||
while frame:
|
||||
try:
|
||||
if frame.f_globals['__package__'] != __package__:
|
||||
break
|
||||
except KeyError: # pragma:no cover
|
||||
# If package is missing, consider we are in. Workaround for python 3.3.
|
||||
break
|
||||
frame = frame.f_back
|
||||
ret = Frame(frame.f_lineno,
|
||||
frame.f_globals.get('__package__'),
|
||||
frame.f_globals.get('__name__'),
|
||||
frame.f_code.co_filename)
|
||||
del frame
|
||||
return ret
|
23
libs/common/rebulk/formatters.py
Normal file
23
libs/common/rebulk/formatters.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Formatter functions to use in patterns.
|
||||
|
||||
All those function have last argument as match.value (str).
|
||||
"""
|
||||
|
||||
|
||||
def formatters(*chained_formatters):
|
||||
"""
|
||||
Chain formatter functions.
|
||||
:param chained_formatters:
|
||||
:type chained_formatters:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
def formatters_chain(input_string): # pylint:disable=missing-docstring
|
||||
for chained_formatter in chained_formatters:
|
||||
input_string = chained_formatter(input_string)
|
||||
return input_string
|
||||
|
||||
return formatters_chain
|
126
libs/common/rebulk/introspector.py
Normal file
126
libs/common/rebulk/introspector.py
Normal file
|
@ -0,0 +1,126 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Introspect rebulk object to retrieve capabilities.
|
||||
"""
|
||||
from abc import ABCMeta, abstractproperty
|
||||
from collections import defaultdict
|
||||
|
||||
import six
|
||||
from .pattern import StringPattern, RePattern, FunctionalPattern
|
||||
from .utils import extend_safe
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Description(object):
|
||||
"""
|
||||
Abstract class for a description.
|
||||
"""
|
||||
@abstractproperty
|
||||
def properties(self): # pragma: no cover
|
||||
"""
|
||||
Properties of described object.
|
||||
:return: all properties that described object can generate grouped by name.
|
||||
:rtype: dict
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class PatternDescription(Description):
|
||||
"""
|
||||
Description of a pattern.
|
||||
"""
|
||||
def __init__(self, pattern): # pylint:disable=too-many-branches
|
||||
self.pattern = pattern
|
||||
self._properties = defaultdict(list)
|
||||
|
||||
if pattern.properties:
|
||||
for key, values in pattern.properties.items():
|
||||
extend_safe(self._properties[key], values)
|
||||
elif 'value' in pattern.match_options:
|
||||
self._properties[pattern.name].append(pattern.match_options['value'])
|
||||
elif isinstance(pattern, StringPattern):
|
||||
extend_safe(self._properties[pattern.name], pattern.patterns)
|
||||
elif isinstance(pattern, RePattern):
|
||||
if pattern.name and pattern.name not in pattern.private_names:
|
||||
extend_safe(self._properties[pattern.name], [None])
|
||||
if not pattern.private_children:
|
||||
for regex_pattern in pattern.patterns:
|
||||
for group_name, values in regex_pattern.groupindex.items():
|
||||
if group_name not in pattern.private_names:
|
||||
extend_safe(self._properties[group_name], [None])
|
||||
elif isinstance(pattern, FunctionalPattern):
|
||||
if pattern.name and pattern.name not in pattern.private_names:
|
||||
extend_safe(self._properties[pattern.name], [None])
|
||||
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties for this rule.
|
||||
:return:
|
||||
:rtype: dict
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
|
||||
class RuleDescription(Description):
|
||||
"""
|
||||
Description of a rule.
|
||||
"""
|
||||
def __init__(self, rule):
|
||||
self.rule = rule
|
||||
|
||||
self._properties = defaultdict(list)
|
||||
|
||||
if rule.properties:
|
||||
for key, values in rule.properties.items():
|
||||
extend_safe(self._properties[key], values)
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties for this rule.
|
||||
:return:
|
||||
:rtype: dict
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
|
||||
class Introspection(Description):
|
||||
"""
|
||||
Introspection results.
|
||||
"""
|
||||
def __init__(self, rebulk, context=None):
|
||||
self.patterns = [PatternDescription(pattern) for pattern in rebulk.effective_patterns(context)
|
||||
if not pattern.private and not pattern.marker]
|
||||
self.rules = [RuleDescription(rule) for rule in rebulk.effective_rules(context)]
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties for Introspection results.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
properties = defaultdict(list)
|
||||
for pattern in self.patterns:
|
||||
for key, values in pattern.properties.items():
|
||||
extend_safe(properties[key], values)
|
||||
for rule in self.rules:
|
||||
for key, values in rule.properties.items():
|
||||
extend_safe(properties[key], values)
|
||||
return properties
|
||||
|
||||
|
||||
def introspect(rebulk, context=None):
|
||||
"""
|
||||
Introspect a Rebulk instance to grab defined objects and properties that can be generated.
|
||||
:param rebulk:
|
||||
:type rebulk: Rebulk
|
||||
:param context:
|
||||
:type context:
|
||||
:return: Introspection instance
|
||||
:rtype: Introspection
|
||||
"""
|
||||
return Introspection(rebulk, context)
|
236
libs/common/rebulk/loose.py
Normal file
236
libs/common/rebulk/loose.py
Normal file
|
@ -0,0 +1,236 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Various utilities functions
|
||||
"""
|
||||
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
|
||||
try:
|
||||
from inspect import getfullargspec as getargspec
|
||||
_fullargspec_supported = True
|
||||
except ImportError:
|
||||
_fullargspec_supported = False
|
||||
from inspect import getargspec
|
||||
|
||||
from .utils import is_iterable
|
||||
|
||||
if sys.version_info < (3, 4, 0): # pragma: no cover
|
||||
def _constructor(class_):
|
||||
"""
|
||||
Retrieves constructor from given class
|
||||
|
||||
:param class_:
|
||||
:type class_: class
|
||||
:return: constructor from given class
|
||||
:rtype: callable
|
||||
"""
|
||||
return class_.__init__
|
||||
else: # pragma: no cover
|
||||
def _constructor(class_):
|
||||
"""
|
||||
Retrieves constructor from given class
|
||||
|
||||
:param class_:
|
||||
:type class_: class
|
||||
:return: constructor from given class
|
||||
:rtype: callable
|
||||
"""
|
||||
return class_
|
||||
|
||||
|
||||
def call(function, *args, **kwargs):
|
||||
"""
|
||||
Call a function or constructor with given args and kwargs after removing args and kwargs that doesn't match
|
||||
function or constructor signature
|
||||
|
||||
:param function: Function or constructor to call
|
||||
:type function: callable
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: sale vakye as default function call
|
||||
:rtype: object
|
||||
"""
|
||||
func = constructor_args if inspect.isclass(function) else function_args
|
||||
call_args, call_kwargs = func(function, *args, **kwargs)
|
||||
return function(*call_args, **call_kwargs)
|
||||
|
||||
|
||||
def function_args(callable_, *args, **kwargs):
|
||||
"""
|
||||
Return (args, kwargs) matching the function signature
|
||||
|
||||
:param callable: callable to inspect
|
||||
:type callable: callable
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
argspec = getargspec(callable_) # pylint:disable=deprecated-method
|
||||
return argspec_args(argspec, False, *args, **kwargs)
|
||||
|
||||
|
||||
def constructor_args(class_, *args, **kwargs):
|
||||
"""
|
||||
Return (args, kwargs) matching the function signature
|
||||
|
||||
:param callable: callable to inspect
|
||||
:type callable: Callable
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
argspec = getargspec(_constructor(class_)) # pylint:disable=deprecated-method
|
||||
return argspec_args(argspec, True, *args, **kwargs)
|
||||
|
||||
|
||||
def argspec_args(argspec, constructor, *args, **kwargs):
|
||||
"""
|
||||
Return (args, kwargs) matching the argspec object
|
||||
|
||||
:param argspec: argspec to use
|
||||
:type argspec: argspec
|
||||
:param constructor: is it a constructor ?
|
||||
:type constructor: bool
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
if argspec.varkw:
|
||||
call_kwarg = kwargs
|
||||
else:
|
||||
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
|
||||
if argspec.varargs:
|
||||
call_args = args
|
||||
else:
|
||||
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
|
||||
return call_args, call_kwarg
|
||||
|
||||
|
||||
if not _fullargspec_supported:
|
||||
def argspec_args_legacy(argspec, constructor, *args, **kwargs):
|
||||
"""
|
||||
Return (args, kwargs) matching the argspec object
|
||||
|
||||
:param argspec: argspec to use
|
||||
:type argspec: argspec
|
||||
:param constructor: is it a constructor ?
|
||||
:type constructor: bool
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
if argspec.keywords:
|
||||
call_kwarg = kwargs
|
||||
else:
|
||||
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
|
||||
if argspec.varargs:
|
||||
call_args = args
|
||||
else:
|
||||
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
|
||||
return call_args, call_kwarg
|
||||
argspec_args = argspec_args_legacy
|
||||
|
||||
|
||||
def ensure_list(param):
|
||||
"""
|
||||
Retrieves a list from given parameter.
|
||||
|
||||
:param param:
|
||||
:type param:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not param:
|
||||
param = []
|
||||
elif not is_iterable(param):
|
||||
param = [param]
|
||||
return param
|
||||
|
||||
|
||||
def ensure_dict(param, default_value, default_key=None):
|
||||
"""
|
||||
Retrieves a dict and a default value from given parameter.
|
||||
|
||||
if parameter is not a dict, it will be promoted as the default value.
|
||||
|
||||
:param param:
|
||||
:type param:
|
||||
:param default_value:
|
||||
:type default_value:
|
||||
:param default_key:
|
||||
:type default_key:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not param:
|
||||
param = default_value
|
||||
if not isinstance(param, dict):
|
||||
if param:
|
||||
default_value = param
|
||||
return {default_key: param}, default_value
|
||||
return param, default_value
|
||||
|
||||
|
||||
def filter_index(collection, predicate=None, index=None):
|
||||
"""
|
||||
Filter collection with predicate function and index.
|
||||
|
||||
If index is not found, returns None.
|
||||
:param collection:
|
||||
:type collection: collection supporting iteration and slicing
|
||||
:param predicate: function to filter the collection with
|
||||
:type predicate: function
|
||||
:param index: position of a single element to retrieve
|
||||
:type index: int
|
||||
:return: filtered list, or single element of filtered list if index is defined
|
||||
:rtype: list or object
|
||||
"""
|
||||
if index is None and isinstance(predicate, int):
|
||||
index = predicate
|
||||
predicate = None
|
||||
if predicate:
|
||||
collection = collection.__class__(filter(predicate, collection))
|
||||
if index is not None:
|
||||
try:
|
||||
collection = collection[index]
|
||||
except IndexError:
|
||||
collection = None
|
||||
return collection
|
||||
|
||||
|
||||
def set_defaults(defaults, kwargs):
|
||||
"""
|
||||
Set defaults from defaults dict to kwargs dict
|
||||
:param defaults:
|
||||
:type defaults:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for key, value in defaults.items():
|
||||
if key not in kwargs and value is not None:
|
||||
kwargs[key] = value
|
||||
elif isinstance(value, list) and isinstance(kwargs[key], list):
|
||||
kwargs[key] = list(value) + kwargs[key]
|
||||
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
|
||||
set_defaults(value, kwargs[key])
|
||||
elif key in kwargs and value is None:
|
||||
kwargs[key] = None
|
872
libs/common/rebulk/match.py
Normal file
872
libs/common/rebulk/match.py
Normal file
|
@ -0,0 +1,872 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Classes and functions related to matches
|
||||
"""
|
||||
import copy
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
try:
|
||||
from collections.abc import MutableSequence
|
||||
except ImportError:
|
||||
from collections import MutableSequence
|
||||
|
||||
try:
|
||||
from collections import OrderedDict # pylint:disable=ungrouped-imports
|
||||
except ImportError: # pragma: no cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
import six
|
||||
|
||||
from .loose import ensure_list, filter_index
|
||||
from .utils import is_iterable
|
||||
from .debug import defined_at
|
||||
|
||||
|
||||
class MatchesDict(OrderedDict):
|
||||
"""
|
||||
A custom dict with matches property.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(MatchesDict, self).__init__()
|
||||
self.matches = defaultdict(list)
|
||||
self.values_list = defaultdict(list)
|
||||
|
||||
|
||||
class _BaseMatches(MutableSequence):
|
||||
"""
|
||||
A custom list[Match] that automatically maintains name, tag, start and end lookup structures.
|
||||
"""
|
||||
_base = list
|
||||
_base_add = _base.append
|
||||
_base_remove = _base.remove
|
||||
_base_extend = _base.extend
|
||||
|
||||
def __init__(self, matches=None, input_string=None): # pylint: disable=super-init-not-called
|
||||
self.input_string = input_string
|
||||
self._max_end = 0
|
||||
self._delegate = []
|
||||
self.__name_dict = None
|
||||
self.__tag_dict = None
|
||||
self.__start_dict = None
|
||||
self.__end_dict = None
|
||||
self.__index_dict = None
|
||||
if matches:
|
||||
self.extend(matches)
|
||||
|
||||
@property
|
||||
def _name_dict(self):
|
||||
if self.__name_dict is None:
|
||||
self.__name_dict = defaultdict(_BaseMatches._base)
|
||||
for name, values in itertools.groupby([m for m in self._delegate if m.name], lambda item: item.name):
|
||||
_BaseMatches._base_extend(self.__name_dict[name], values)
|
||||
|
||||
return self.__name_dict
|
||||
|
||||
@property
|
||||
def _start_dict(self):
|
||||
if self.__start_dict is None:
|
||||
self.__start_dict = defaultdict(_BaseMatches._base)
|
||||
for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.start):
|
||||
_BaseMatches._base_extend(self.__start_dict[start], values)
|
||||
|
||||
return self.__start_dict
|
||||
|
||||
@property
|
||||
def _end_dict(self):
|
||||
if self.__end_dict is None:
|
||||
self.__end_dict = defaultdict(_BaseMatches._base)
|
||||
for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.end):
|
||||
_BaseMatches._base_extend(self.__end_dict[start], values)
|
||||
|
||||
return self.__end_dict
|
||||
|
||||
@property
|
||||
def _tag_dict(self):
|
||||
if self.__tag_dict is None:
|
||||
self.__tag_dict = defaultdict(_BaseMatches._base)
|
||||
for match in self._delegate:
|
||||
for tag in match.tags:
|
||||
_BaseMatches._base_add(self.__tag_dict[tag], match)
|
||||
|
||||
return self.__tag_dict
|
||||
|
||||
@property
|
||||
def _index_dict(self):
|
||||
if self.__index_dict is None:
|
||||
self.__index_dict = defaultdict(_BaseMatches._base)
|
||||
for match in self._delegate:
|
||||
for index in range(*match.span):
|
||||
_BaseMatches._base_add(self.__index_dict[index], match)
|
||||
|
||||
return self.__index_dict
|
||||
|
||||
def _add_match(self, match):
|
||||
"""
|
||||
Add a match
|
||||
:param match:
|
||||
:type match: Match
|
||||
"""
|
||||
if self.__name_dict is not None:
|
||||
if match.name:
|
||||
_BaseMatches._base_add(self._name_dict[match.name], (match))
|
||||
if self.__tag_dict is not None:
|
||||
for tag in match.tags:
|
||||
_BaseMatches._base_add(self._tag_dict[tag], match)
|
||||
if self.__start_dict is not None:
|
||||
_BaseMatches._base_add(self._start_dict[match.start], match)
|
||||
if self.__end_dict is not None:
|
||||
_BaseMatches._base_add(self._end_dict[match.end], match)
|
||||
if self.__index_dict is not None:
|
||||
for index in range(*match.span):
|
||||
_BaseMatches._base_add(self._index_dict[index], match)
|
||||
if match.end > self._max_end:
|
||||
self._max_end = match.end
|
||||
|
||||
def _remove_match(self, match):
|
||||
"""
|
||||
Remove a match
|
||||
:param match:
|
||||
:type match: Match
|
||||
"""
|
||||
if self.__name_dict is not None:
|
||||
if match.name:
|
||||
_BaseMatches._base_remove(self._name_dict[match.name], match)
|
||||
if self.__tag_dict is not None:
|
||||
for tag in match.tags:
|
||||
_BaseMatches._base_remove(self._tag_dict[tag], match)
|
||||
if self.__start_dict is not None:
|
||||
_BaseMatches._base_remove(self._start_dict[match.start], match)
|
||||
if self.__end_dict is not None:
|
||||
_BaseMatches._base_remove(self._end_dict[match.end], match)
|
||||
if self.__index_dict is not None:
|
||||
for index in range(*match.span):
|
||||
_BaseMatches._base_remove(self._index_dict[index], match)
|
||||
if match.end >= self._max_end and not self._end_dict[match.end]:
|
||||
self._max_end = max(self._end_dict.keys())
|
||||
|
||||
def previous(self, match, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves the nearest previous matches.
|
||||
:param match:
|
||||
:type match:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
current = match.start
|
||||
while current > -1:
|
||||
previous_matches = self.ending(current)
|
||||
if previous_matches:
|
||||
return filter_index(previous_matches, predicate, index)
|
||||
current -= 1
|
||||
return filter_index(_BaseMatches._base(), predicate, index)
|
||||
|
||||
def next(self, match, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves the nearest next matches.
|
||||
:param match:
|
||||
:type match:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
current = match.start + 1
|
||||
while current <= self._max_end:
|
||||
next_matches = self.starting(current)
|
||||
if next_matches:
|
||||
return filter_index(next_matches, predicate, index)
|
||||
current += 1
|
||||
return filter_index(_BaseMatches._base(), predicate, index)
|
||||
|
||||
def named(self, name, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that have the given name.
|
||||
:param name:
|
||||
:type name: str
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
return filter_index(_BaseMatches._base(self._name_dict[name]), predicate, index)
|
||||
|
||||
def tagged(self, tag, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that have the given tag defined.
|
||||
:param tag:
|
||||
:type tag: str
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
return filter_index(_BaseMatches._base(self._tag_dict[tag]), predicate, index)
|
||||
|
||||
def starting(self, start, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that starts at given index.
|
||||
:param start: the starting index
|
||||
:type start: int
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
return filter_index(_BaseMatches._base(self._start_dict[start]), predicate, index)
|
||||
|
||||
def ending(self, end, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that ends at given index.
|
||||
:param end: the ending index
|
||||
:type end: int
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
return filter_index(_BaseMatches._base(self._end_dict[end]), predicate, index)
|
||||
|
||||
def range(self, start=0, end=None, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that are available in given range, sorted from start to end.
|
||||
:param start: the starting index
|
||||
:type start: int
|
||||
:param end: the ending index
|
||||
:type end: int
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
if end is None:
|
||||
end = self.max_end
|
||||
else:
|
||||
end = min(self.max_end, end)
|
||||
ret = _BaseMatches._base()
|
||||
for match in sorted(self):
|
||||
if match.start < end and match.end > start:
|
||||
ret.append(match)
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def chain_before(self, position, seps, start=0, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of chained matches, before position, matching predicate and separated by characters from seps
|
||||
only.
|
||||
:param position:
|
||||
:type position:
|
||||
:param seps:
|
||||
:type seps:
|
||||
:param start:
|
||||
:type start:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if hasattr(position, 'start'):
|
||||
position = position.start
|
||||
|
||||
chain = _BaseMatches._base()
|
||||
position = min(self.max_end, position)
|
||||
|
||||
for i in reversed(range(start, position)):
|
||||
index_matches = self.at_index(i)
|
||||
filtered_matches = [index_match for index_match in index_matches if not predicate or predicate(index_match)]
|
||||
if filtered_matches:
|
||||
for chain_match in filtered_matches:
|
||||
if chain_match not in chain:
|
||||
chain.append(chain_match)
|
||||
elif self.input_string[i] not in seps:
|
||||
break
|
||||
|
||||
return filter_index(chain, predicate, index)
|
||||
|
||||
def chain_after(self, position, seps, end=None, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of chained matches, after position, matching predicate and separated by characters from seps
|
||||
only.
|
||||
:param position:
|
||||
:type position:
|
||||
:param seps:
|
||||
:type seps:
|
||||
:param end:
|
||||
:type end:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if hasattr(position, 'end'):
|
||||
position = position.end
|
||||
chain = _BaseMatches._base()
|
||||
|
||||
if end is None:
|
||||
end = self.max_end
|
||||
else:
|
||||
end = min(self.max_end, end)
|
||||
|
||||
for i in range(position, end):
|
||||
index_matches = self.at_index(i)
|
||||
filtered_matches = [index_match for index_match in index_matches if not predicate or predicate(index_match)]
|
||||
if filtered_matches:
|
||||
for chain_match in filtered_matches:
|
||||
if chain_match not in chain:
|
||||
chain.append(chain_match)
|
||||
elif self.input_string[i] not in seps:
|
||||
break
|
||||
|
||||
return filter_index(chain, predicate, index)
|
||||
|
||||
@property
|
||||
def max_end(self):
|
||||
"""
|
||||
Retrieves the maximum index.
|
||||
:return:
|
||||
"""
|
||||
return max(len(self.input_string), self._max_end) if self.input_string else self._max_end
|
||||
|
||||
def _hole_start(self, position, ignore=None):
|
||||
"""
|
||||
Retrieves the start of hole index from position.
|
||||
:param position:
|
||||
:type position:
|
||||
:param ignore:
|
||||
:type ignore:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for lindex in reversed(range(0, position)):
|
||||
for starting in self.starting(lindex):
|
||||
if not ignore or not ignore(starting):
|
||||
return lindex
|
||||
return 0
|
||||
|
||||
def _hole_end(self, position, ignore=None):
|
||||
"""
|
||||
Retrieves the end of hole index from position.
|
||||
:param position:
|
||||
:type position:
|
||||
:param ignore:
|
||||
:type ignore:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for rindex in range(position, self.max_end):
|
||||
for starting in self.starting(rindex):
|
||||
if not ignore or not ignore(starting):
|
||||
return rindex
|
||||
return self.max_end
|
||||
|
||||
def holes(self, start=0, end=None, formatter=None, ignore=None, seps=None, predicate=None,
|
||||
index=None): # pylint: disable=too-many-branches,too-many-locals
|
||||
"""
|
||||
Retrieves a set of Match objects that are not defined in given range.
|
||||
:param start:
|
||||
:type start:
|
||||
:param end:
|
||||
:type end:
|
||||
:param formatter:
|
||||
:type formatter:
|
||||
:param ignore:
|
||||
:type ignore:
|
||||
:param seps:
|
||||
:type seps:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
assert self.input_string if seps else True, "input_string must be defined when using seps parameter"
|
||||
if end is None:
|
||||
end = self.max_end
|
||||
else:
|
||||
end = min(self.max_end, end)
|
||||
ret = _BaseMatches._base()
|
||||
hole = False
|
||||
rindex = start
|
||||
|
||||
loop_start = self._hole_start(start, ignore)
|
||||
|
||||
for rindex in range(loop_start, end):
|
||||
current = []
|
||||
for at_index in self.at_index(rindex):
|
||||
if not ignore or not ignore(at_index):
|
||||
current.append(at_index)
|
||||
|
||||
if seps and hole and self.input_string and self.input_string[rindex] in seps:
|
||||
hole = False
|
||||
ret[-1].end = rindex
|
||||
else:
|
||||
if not current and not hole:
|
||||
# Open a new hole match
|
||||
hole = True
|
||||
ret.append(Match(max(rindex, start), None, input_string=self.input_string, formatter=formatter))
|
||||
elif current and hole:
|
||||
# Close current hole match
|
||||
hole = False
|
||||
ret[-1].end = rindex
|
||||
|
||||
if ret and hole:
|
||||
# go the the next starting element ...
|
||||
ret[-1].end = min(self._hole_end(rindex, ignore), end)
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def conflicting(self, match, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of ``Match`` objects that conflicts with given match.
|
||||
:param match:
|
||||
:type match:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = _BaseMatches._base()
|
||||
|
||||
for i in range(*match.span):
|
||||
for at_match in self.at_index(i):
|
||||
if at_match not in ret:
|
||||
ret.append(at_match)
|
||||
|
||||
ret.remove(match)
|
||||
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def at_match(self, match, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of matches from given match.
|
||||
"""
|
||||
return self.at_span(match.span, predicate, index)
|
||||
|
||||
def at_span(self, span, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of matches from given (start, end) tuple.
|
||||
"""
|
||||
starting = self._index_dict[span[0]]
|
||||
ending = self._index_dict[span[1] - 1]
|
||||
|
||||
merged = list(starting)
|
||||
for marker in ending:
|
||||
if marker not in merged:
|
||||
merged.append(marker)
|
||||
|
||||
return filter_index(merged, predicate, index)
|
||||
|
||||
def at_index(self, pos, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of matches from given position
|
||||
"""
|
||||
return filter_index(self._index_dict[pos], predicate, index)
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
"""
|
||||
Retrieve all names.
|
||||
:return:
|
||||
"""
|
||||
return self._name_dict.keys()
|
||||
|
||||
@property
|
||||
def tags(self):
|
||||
"""
|
||||
Retrieve all tags.
|
||||
:return:
|
||||
"""
|
||||
return self._tag_dict.keys()
|
||||
|
||||
def to_dict(self, details=False, first_value=False, enforce_list=False):
|
||||
"""
|
||||
Converts matches to a dict object.
|
||||
:param details if True, values will be complete Match object, else it will be only string Match.value property
|
||||
:type details: bool
|
||||
:param first_value if True, only the first value will be kept. Else, multiple values will be set as a list in
|
||||
the dict.
|
||||
:type first_value: bool
|
||||
:param enforce_list: if True, value is wrapped in a list even when a single value is found. Else, list values
|
||||
are available under `values_list` property of the returned dict object.
|
||||
:type enforce_list: bool
|
||||
:return:
|
||||
:rtype: dict
|
||||
"""
|
||||
ret = MatchesDict()
|
||||
for match in sorted(self):
|
||||
value = match if details else match.value
|
||||
ret.matches[match.name].append(match)
|
||||
if not enforce_list and value not in ret.values_list[match.name]:
|
||||
ret.values_list[match.name].append(value)
|
||||
if match.name in ret.keys():
|
||||
if not first_value:
|
||||
if not isinstance(ret[match.name], list):
|
||||
if ret[match.name] == value:
|
||||
continue
|
||||
ret[match.name] = [ret[match.name]]
|
||||
else:
|
||||
if value in ret[match.name]:
|
||||
continue
|
||||
ret[match.name].append(value)
|
||||
else:
|
||||
if enforce_list and not isinstance(value, list):
|
||||
ret[match.name] = [value]
|
||||
else:
|
||||
ret[match.name] = value
|
||||
return ret
|
||||
|
||||
if six.PY2: # pragma: no cover
|
||||
def clear(self):
|
||||
"""
|
||||
Python 3 backport
|
||||
"""
|
||||
del self[:]
|
||||
|
||||
def __len__(self):
|
||||
return len(self._delegate)
|
||||
|
||||
def __getitem__(self, index):
|
||||
ret = self._delegate[index]
|
||||
if isinstance(ret, list):
|
||||
return Matches(ret)
|
||||
return ret
|
||||
|
||||
def __setitem__(self, index, match):
|
||||
self._delegate[index] = match
|
||||
if isinstance(index, slice):
|
||||
for match_item in match:
|
||||
self._add_match(match_item)
|
||||
return
|
||||
self._add_match(match)
|
||||
|
||||
def __delitem__(self, index):
|
||||
match = self._delegate[index]
|
||||
del self._delegate[index]
|
||||
if isinstance(match, list):
|
||||
# if index is a slice, we has a match list
|
||||
for match_item in match:
|
||||
self._remove_match(match_item)
|
||||
else:
|
||||
self._remove_match(match)
|
||||
|
||||
def __repr__(self):
|
||||
return self._delegate.__repr__()
|
||||
|
||||
def insert(self, index, value):
|
||||
self._delegate.insert(index, value)
|
||||
self._add_match(value)
|
||||
|
||||
|
||||
class Matches(_BaseMatches):
|
||||
"""
|
||||
A custom list[Match] contains matches list.
|
||||
"""
|
||||
|
||||
def __init__(self, matches=None, input_string=None):
|
||||
self.markers = Markers(input_string=input_string)
|
||||
super(Matches, self).__init__(matches=matches, input_string=input_string)
|
||||
|
||||
def _add_match(self, match):
|
||||
assert not match.marker, "A marker match should not be added to <Matches> object"
|
||||
super(Matches, self)._add_match(match)
|
||||
|
||||
|
||||
class Markers(_BaseMatches):
|
||||
"""
|
||||
A custom list[Match] containing markers list.
|
||||
"""
|
||||
|
||||
def __init__(self, matches=None, input_string=None):
|
||||
super(Markers, self).__init__(matches=None, input_string=input_string)
|
||||
|
||||
def _add_match(self, match):
|
||||
assert match.marker, "A non-marker match should not be added to <Markers> object"
|
||||
super(Markers, self)._add_match(match)
|
||||
|
||||
|
||||
class Match(object):
|
||||
"""
|
||||
Object storing values related to a single match
|
||||
"""
|
||||
|
||||
def __init__(self, start, end, value=None, name=None, tags=None, marker=None, parent=None, private=None,
|
||||
pattern=None, input_string=None, formatter=None, conflict_solver=None, **kwargs):
|
||||
# pylint: disable=unused-argument
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.name = name
|
||||
self._value = value
|
||||
self.tags = ensure_list(tags)
|
||||
self.marker = marker
|
||||
self.parent = parent
|
||||
self.input_string = input_string
|
||||
self.formatter = formatter
|
||||
self.pattern = pattern
|
||||
self.private = private
|
||||
self.conflict_solver = conflict_solver
|
||||
self._children = None
|
||||
self._raw_start = None
|
||||
self._raw_end = None
|
||||
self.defined_at = pattern.defined_at if pattern else defined_at()
|
||||
|
||||
@property
|
||||
def span(self):
|
||||
"""
|
||||
2-tuple with start and end indices of the match
|
||||
"""
|
||||
return self.start, self.end
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
"""
|
||||
Children matches.
|
||||
"""
|
||||
if self._children is None:
|
||||
self._children = Matches(None, self.input_string)
|
||||
return self._children
|
||||
|
||||
@children.setter
|
||||
def children(self, value):
|
||||
self._children = value
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
"""
|
||||
Get the value of the match, using formatter if defined.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self._value:
|
||||
return self._value
|
||||
if self.formatter:
|
||||
return self.formatter(self.raw)
|
||||
return self.raw
|
||||
|
||||
@value.setter
|
||||
def value(self, value):
|
||||
"""
|
||||
Set the value (hardcode)
|
||||
:param value:
|
||||
:type value:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._value = value # pylint: disable=attribute-defined-outside-init
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
"""
|
||||
Get all names of children
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not self.children:
|
||||
return set([self.name])
|
||||
ret = set()
|
||||
for child in self.children:
|
||||
for name in child.names:
|
||||
ret.add(name)
|
||||
return ret
|
||||
|
||||
@property
|
||||
def raw_start(self):
|
||||
"""
|
||||
start index of raw value
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self._raw_start is None:
|
||||
return self.start
|
||||
return self._raw_start
|
||||
|
||||
@raw_start.setter
|
||||
def raw_start(self, value):
|
||||
"""
|
||||
Set start index of raw value
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._raw_start = value
|
||||
|
||||
@property
|
||||
def raw_end(self):
|
||||
"""
|
||||
end index of raw value
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self._raw_end is None:
|
||||
return self.end
|
||||
return self._raw_end
|
||||
|
||||
@raw_end.setter
|
||||
def raw_end(self, value):
|
||||
"""
|
||||
Set end index of raw value
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._raw_end = value
|
||||
|
||||
@property
|
||||
def raw(self):
|
||||
"""
|
||||
Get the raw value of the match, without using hardcoded value nor formatter.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.input_string:
|
||||
return self.input_string[self.raw_start:self.raw_end]
|
||||
return None
|
||||
|
||||
@property
|
||||
def initiator(self):
|
||||
"""
|
||||
Retrieve the initiator parent of a match
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
match = self
|
||||
while match.parent:
|
||||
match = match.parent
|
||||
return match
|
||||
|
||||
def crop(self, crops, predicate=None, index=None):
|
||||
"""
|
||||
crop the match with given Match objects or spans tuples
|
||||
:param crops:
|
||||
:type crops: list or object
|
||||
:return: a list of Match objects
|
||||
:rtype: list[Match]
|
||||
"""
|
||||
if not is_iterable(crops) or len(crops) == 2 and isinstance(crops[0], int):
|
||||
crops = [crops]
|
||||
initial = copy.deepcopy(self)
|
||||
ret = [initial]
|
||||
for crop in crops:
|
||||
if hasattr(crop, 'span'):
|
||||
start, end = crop.span
|
||||
else:
|
||||
start, end = crop
|
||||
for current in list(ret):
|
||||
if start <= current.start and end >= current.end:
|
||||
# self is included in crop, remove current ...
|
||||
ret.remove(current)
|
||||
elif start >= current.start and end <= current.end:
|
||||
# crop is included in self, split current ...
|
||||
right = copy.deepcopy(current)
|
||||
current.end = start
|
||||
if not current:
|
||||
ret.remove(current)
|
||||
right.start = end
|
||||
if right:
|
||||
ret.append(right)
|
||||
elif current.end >= end > current.start:
|
||||
current.start = end
|
||||
elif current.start <= start < current.end:
|
||||
current.end = start
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def split(self, seps, predicate=None, index=None):
|
||||
"""
|
||||
Split this match in multiple matches using given separators.
|
||||
:param seps:
|
||||
:type seps: string containing separator characters
|
||||
:return: list of new Match objects
|
||||
:rtype: list
|
||||
"""
|
||||
split_match = copy.deepcopy(self)
|
||||
current_match = split_match
|
||||
ret = []
|
||||
|
||||
for i in range(0, len(self.raw)):
|
||||
if self.raw[i] in seps:
|
||||
if not split_match:
|
||||
split_match = copy.deepcopy(current_match)
|
||||
current_match.end = self.start + i
|
||||
|
||||
else:
|
||||
if split_match:
|
||||
split_match.start = self.start + i
|
||||
current_match = split_match
|
||||
ret.append(split_match)
|
||||
split_match = None
|
||||
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def __len__(self):
|
||||
return self.end - self.start
|
||||
|
||||
def __hash__(self):
|
||||
return hash(Match) + hash(self.start) + hash(self.end) + hash(self.value)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span == other.span and self.value == other.value and self.name == other.name and \
|
||||
self.parent == other.parent
|
||||
return NotImplemented
|
||||
|
||||
def __ne__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span != other.span or self.value != other.value or self.name != other.name or \
|
||||
self.parent != other.parent
|
||||
return NotImplemented
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span < other.span
|
||||
return NotImplemented
|
||||
|
||||
def __gt__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span > other.span
|
||||
return NotImplemented
|
||||
|
||||
def __le__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span <= other.span
|
||||
return NotImplemented
|
||||
|
||||
def __ge__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span >= other.span
|
||||
return NotImplemented
|
||||
|
||||
def __repr__(self):
|
||||
flags = ""
|
||||
name = ""
|
||||
tags = ""
|
||||
defined = ""
|
||||
initiator = ""
|
||||
if self.initiator.value != self.value:
|
||||
initiator = "+initiator=" + self.initiator.value
|
||||
if self.private:
|
||||
flags += '+private'
|
||||
if self.name:
|
||||
name = "+name=%s" % (self.name,)
|
||||
if self.tags:
|
||||
tags = "+tags=%s" % (self.tags,)
|
||||
if self.defined_at:
|
||||
defined += "@%s" % (self.defined_at,)
|
||||
return "<%s:%s%s%s%s%s%s>" % (self.value, self.span, flags, name, tags, initiator, defined)
|
489
libs/common/rebulk/pattern.py
Normal file
489
libs/common/rebulk/pattern.py
Normal file
|
@ -0,0 +1,489 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Abstract pattern class definition along with various implementations (regexp, string, functional)
|
||||
"""
|
||||
# pylint: disable=super-init-not-called,wrong-import-position
|
||||
|
||||
from abc import ABCMeta, abstractmethod, abstractproperty
|
||||
|
||||
import six
|
||||
|
||||
from . import debug
|
||||
from .loose import call, ensure_list, ensure_dict
|
||||
from .match import Match
|
||||
from .remodule import re, REGEX_AVAILABLE
|
||||
from .utils import find_all, is_iterable, get_first_defined
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Pattern(object):
|
||||
"""
|
||||
Definition of a particular pattern to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
|
||||
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
|
||||
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
|
||||
properties=None, post_processor=None, **kwargs):
|
||||
"""
|
||||
:param name: Name of this pattern
|
||||
:type name: str
|
||||
:param tags: List of tags related to this pattern
|
||||
:type tags: list[str]
|
||||
:param formatter: dict (name, func) of formatter to use with this pattern. name is the match name to support,
|
||||
and func a function(input_string) that returns the formatted string. A single formatter function can also be
|
||||
passed as a shortcut for {None: formatter}. The returned formatted string with be set in Match.value property.
|
||||
:type formatter: dict[str, func] || func
|
||||
:param value: dict (name, value) of value to use with this pattern. name is the match name to support,
|
||||
and value an object for the match value. A single object value can also be
|
||||
passed as a shortcut for {None: value}. The value with be set in Match.value property.
|
||||
:type value: dict[str, object] || object
|
||||
:param validator: dict (name, func) of validator to use with this pattern. name is the match name to support,
|
||||
and func a function(match) that returns the a boolean. A single validator function can also be
|
||||
passed as a shortcut for {None: validator}. If return value is False, match will be ignored.
|
||||
:param children: generates children instead of parent
|
||||
:type children: bool
|
||||
:param every: generates both parent and children.
|
||||
:type every: bool
|
||||
:param private: flag this pattern as beeing private.
|
||||
:type private: bool
|
||||
:param private_parent: force return of parent and flag parent matches as private.
|
||||
:type private_parent: bool
|
||||
:param private_children: force return of children and flag children matches as private.
|
||||
:type private_children: bool
|
||||
:param private_names: force return of named matches as private.
|
||||
:type private_names: bool
|
||||
:param ignore_names: drop some named matches after validation.
|
||||
:type ignore_names: bool
|
||||
:param marker: flag this pattern as beeing a marker.
|
||||
:type private: bool
|
||||
:param format_all if True, pattern will format every match in the hierarchy (even match not yield).
|
||||
:type format_all: bool
|
||||
:param validate_all if True, pattern will validate every match in the hierarchy (even match not yield).
|
||||
:type validate_all: bool
|
||||
:param disabled: if True, this pattern is disabled. Can also be a function(context).
|
||||
:type disabled: bool|function
|
||||
:param log_lvl: Log level associated to this pattern
|
||||
:type log_lvl: int
|
||||
:param post_process: Post processing function
|
||||
:type post_processor: func
|
||||
"""
|
||||
# pylint:disable=too-many-locals,unused-argument
|
||||
self.name = name
|
||||
self.tags = ensure_list(tags)
|
||||
self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x)
|
||||
self.values, self._default_value = ensure_dict(value, None)
|
||||
self.validators, self._default_validator = ensure_dict(validator, lambda match: True)
|
||||
self.every = every
|
||||
self.children = children
|
||||
self.private = private
|
||||
self.private_names = private_names if private_names else []
|
||||
self.ignore_names = ignore_names if ignore_names else []
|
||||
self.private_parent = private_parent
|
||||
self.private_children = private_children
|
||||
self.marker = marker
|
||||
self.format_all = format_all
|
||||
self.validate_all = validate_all
|
||||
if not callable(disabled):
|
||||
self.disabled = lambda context: disabled
|
||||
else:
|
||||
self.disabled = disabled
|
||||
self._log_level = log_level
|
||||
self._properties = properties
|
||||
self.defined_at = debug.defined_at()
|
||||
if not callable(post_processor):
|
||||
self.post_processor = None
|
||||
else:
|
||||
self.post_processor = post_processor
|
||||
|
||||
@property
|
||||
def log_level(self):
|
||||
"""
|
||||
Log level for this pattern.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
|
||||
|
||||
def _yield_children(self, match):
|
||||
"""
|
||||
Does this match has children
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return match.children and (self.children or self.every)
|
||||
|
||||
def _yield_parent(self):
|
||||
"""
|
||||
Does this mat
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return not self.children or self.every
|
||||
|
||||
def _match_parent(self, match, yield_parent):
|
||||
"""
|
||||
Handle a parent match
|
||||
:param match:
|
||||
:type match:
|
||||
:param yield_parent:
|
||||
:type yield_parent:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not match or match.value == "":
|
||||
return False
|
||||
|
||||
pattern_value = get_first_defined(self.values, [match.name, '__parent__', None],
|
||||
self._default_value)
|
||||
if pattern_value:
|
||||
match.value = pattern_value
|
||||
|
||||
if yield_parent or self.format_all:
|
||||
match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None],
|
||||
self._default_formatter)
|
||||
if yield_parent or self.validate_all:
|
||||
validator = get_first_defined(self.validators, [match.name, '__parent__', None],
|
||||
self._default_validator)
|
||||
if validator and not validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _match_child(self, child, yield_children):
|
||||
"""
|
||||
Handle a children match
|
||||
:param child:
|
||||
:type child:
|
||||
:param yield_children:
|
||||
:type yield_children:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not child or child.value == "":
|
||||
return False
|
||||
|
||||
pattern_value = get_first_defined(self.values, [child.name, '__children__', None],
|
||||
self._default_value)
|
||||
if pattern_value:
|
||||
child.value = pattern_value
|
||||
|
||||
if yield_children or self.format_all:
|
||||
child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None],
|
||||
self._default_formatter)
|
||||
|
||||
if yield_children or self.validate_all:
|
||||
validator = get_first_defined(self.validators, [child.name, '__children__', None],
|
||||
self._default_validator)
|
||||
if validator and not validator(child):
|
||||
return False
|
||||
return True
|
||||
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
"""
|
||||
Computes all matches for a given input
|
||||
|
||||
:param input_string: the string to parse
|
||||
:type input_string: str
|
||||
:param context: the context
|
||||
:type context: dict
|
||||
:param with_raw_matches: should return details
|
||||
:type with_raw_matches: dict
|
||||
:return: matches based on input_string for this pattern
|
||||
:rtype: iterator[Match]
|
||||
"""
|
||||
# pylint: disable=too-many-branches
|
||||
|
||||
matches = []
|
||||
raw_matches = []
|
||||
for pattern in self.patterns:
|
||||
yield_parent = self._yield_parent()
|
||||
match_index = -1
|
||||
for match in self._match(pattern, input_string, context):
|
||||
match_index += 1
|
||||
match.match_index = match_index
|
||||
raw_matches.append(match)
|
||||
yield_children = self._yield_children(match)
|
||||
if not self._match_parent(match, yield_parent):
|
||||
continue
|
||||
validated = True
|
||||
for child in match.children:
|
||||
if not self._match_child(child, yield_children):
|
||||
validated = False
|
||||
break
|
||||
if validated:
|
||||
if self.private_parent:
|
||||
match.private = True
|
||||
if self.private_children:
|
||||
for child in match.children:
|
||||
child.private = True
|
||||
if yield_parent or self.private_parent:
|
||||
matches.append(match)
|
||||
if yield_children or self.private_children:
|
||||
for child in match.children:
|
||||
child.match_index = match_index
|
||||
matches.append(child)
|
||||
matches = self._matches_post_process(matches)
|
||||
self._matches_privatize(matches)
|
||||
self._matches_ignore(matches)
|
||||
if with_raw_matches:
|
||||
return matches, raw_matches
|
||||
return matches
|
||||
|
||||
def _matches_post_process(self, matches):
|
||||
"""
|
||||
Post process matches with user defined function
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.post_processor:
|
||||
return self.post_processor(matches, self)
|
||||
return matches
|
||||
|
||||
def _matches_privatize(self, matches):
|
||||
"""
|
||||
Mark matches included in private_names with private flag.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.private_names:
|
||||
for match in matches:
|
||||
if match.name in self.private_names:
|
||||
match.private = True
|
||||
|
||||
def _matches_ignore(self, matches):
|
||||
"""
|
||||
Ignore matches included in ignore_names.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.ignore_names:
|
||||
for match in list(matches):
|
||||
if match.name in self.ignore_names:
|
||||
matches.remove(match)
|
||||
|
||||
@abstractproperty
|
||||
def patterns(self): # pragma: no cover
|
||||
"""
|
||||
List of base patterns defined
|
||||
|
||||
:return: A list of base patterns
|
||||
:rtype: list
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties names and values that can ben retrieved by this pattern.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self._properties:
|
||||
return self._properties
|
||||
return {}
|
||||
|
||||
@abstractproperty
|
||||
def match_options(self): # pragma: no cover
|
||||
"""
|
||||
dict of default options for generated Match objects
|
||||
|
||||
:return: **options to pass to Match constructor
|
||||
:rtype: dict
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _match(self, pattern, input_string, context=None): # pragma: no cover
|
||||
"""
|
||||
Computes all matches for a given pattern and input
|
||||
|
||||
:param pattern: the pattern to use
|
||||
:param input_string: the string to parse
|
||||
:type input_string: str
|
||||
:param context: the context
|
||||
:type context: dict
|
||||
:return: matches based on input_string for this pattern
|
||||
:rtype: iterator[Match]
|
||||
"""
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
defined = ""
|
||||
if self.defined_at:
|
||||
defined = "@%s" % (self.defined_at,)
|
||||
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.__repr__patterns__)
|
||||
|
||||
@property
|
||||
def __repr__patterns__(self):
|
||||
return self.patterns
|
||||
|
||||
|
||||
class StringPattern(Pattern):
|
||||
"""
|
||||
Definition of one or many strings to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, *patterns, **kwargs):
|
||||
super(StringPattern, self).__init__(**kwargs)
|
||||
self._patterns = patterns
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
|
||||
@property
|
||||
def patterns(self):
|
||||
return self._patterns
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
return self._match_kwargs
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
for index in find_all(input_string, pattern, **self._kwargs):
|
||||
yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
|
||||
|
||||
class RePattern(Pattern):
|
||||
"""
|
||||
Definition of one or many regular expression pattern to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, *patterns, **kwargs):
|
||||
super(RePattern, self).__init__(**kwargs)
|
||||
self.repeated_captures = REGEX_AVAILABLE
|
||||
if 'repeated_captures' in kwargs:
|
||||
self.repeated_captures = kwargs.get('repeated_captures')
|
||||
if self.repeated_captures and not REGEX_AVAILABLE: # pragma: no cover
|
||||
raise NotImplementedError("repeated_capture is available only with regex module.")
|
||||
self.abbreviations = kwargs.get('abbreviations', [])
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
self._children_match_kwargs = filter_match_kwargs(kwargs, children=True)
|
||||
self._patterns = []
|
||||
for pattern in patterns:
|
||||
if isinstance(pattern, six.string_types):
|
||||
if self.abbreviations and pattern:
|
||||
for key, replacement in self.abbreviations:
|
||||
pattern = pattern.replace(key, replacement)
|
||||
pattern = call(re.compile, pattern, **self._kwargs)
|
||||
elif isinstance(pattern, dict):
|
||||
if self.abbreviations and 'pattern' in pattern:
|
||||
for key, replacement in self.abbreviations:
|
||||
pattern['pattern'] = pattern['pattern'].replace(key, replacement)
|
||||
pattern = re.compile(**pattern)
|
||||
elif hasattr(pattern, '__iter__'):
|
||||
pattern = re.compile(*pattern)
|
||||
self._patterns.append(pattern)
|
||||
|
||||
@property
|
||||
def patterns(self):
|
||||
return self._patterns
|
||||
|
||||
@property
|
||||
def __repr__patterns__(self):
|
||||
return [pattern.pattern for pattern in self.patterns]
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
return self._match_kwargs
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
names = dict((v, k) for k, v in pattern.groupindex.items())
|
||||
for match_object in pattern.finditer(input_string):
|
||||
start = match_object.start()
|
||||
end = match_object.end()
|
||||
main_match = Match(start, end, pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
|
||||
if pattern.groups:
|
||||
for i in range(1, pattern.groups + 1):
|
||||
name = names.get(i, main_match.name)
|
||||
if self.repeated_captures:
|
||||
for start, end in match_object.spans(i):
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
main_match.children.append(child_match)
|
||||
else:
|
||||
start, end = match_object.span(i)
|
||||
if start > -1 and end > -1:
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
main_match.children.append(child_match)
|
||||
|
||||
yield main_match
|
||||
|
||||
|
||||
class FunctionalPattern(Pattern):
|
||||
"""
|
||||
Definition of one or many functional pattern to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, *patterns, **kwargs):
|
||||
super(FunctionalPattern, self).__init__(**kwargs)
|
||||
self._patterns = patterns
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
|
||||
@property
|
||||
def patterns(self):
|
||||
return self._patterns
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
return self._match_kwargs
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
ret = call(pattern, input_string, context, **self._kwargs)
|
||||
if ret:
|
||||
if not is_iterable(ret) or isinstance(ret, dict) \
|
||||
or (is_iterable(ret) and hasattr(ret, '__getitem__') and isinstance(ret[0], int)):
|
||||
args_iterable = [ret]
|
||||
else:
|
||||
args_iterable = ret
|
||||
for args in args_iterable:
|
||||
if isinstance(args, dict):
|
||||
options = args
|
||||
options.pop('input_string', None)
|
||||
options.pop('pattern', None)
|
||||
if self._match_kwargs:
|
||||
options = self._match_kwargs.copy()
|
||||
options.update(args)
|
||||
yield Match(pattern=self, input_string=input_string, **options)
|
||||
else:
|
||||
kwargs = self._match_kwargs
|
||||
if isinstance(args[-1], dict):
|
||||
kwargs = dict(kwargs)
|
||||
kwargs.update(args[-1])
|
||||
args = args[:-1]
|
||||
yield Match(*args, pattern=self, input_string=input_string, **kwargs)
|
||||
|
||||
|
||||
def filter_match_kwargs(kwargs, children=False):
|
||||
"""
|
||||
Filters out kwargs for Match construction
|
||||
|
||||
:param kwargs:
|
||||
:type kwargs: dict
|
||||
:param children:
|
||||
:type children: Flag to filter children matches
|
||||
:return: A filtered dict
|
||||
:rtype: dict
|
||||
"""
|
||||
kwargs = kwargs.copy()
|
||||
for key in ('pattern', 'start', 'end', 'parent', 'formatter', 'value'):
|
||||
if key in kwargs:
|
||||
del kwargs[key]
|
||||
if children:
|
||||
for key in ('name',):
|
||||
if key in kwargs:
|
||||
del kwargs[key]
|
||||
return kwargs
|
107
libs/common/rebulk/processors.py
Normal file
107
libs/common/rebulk/processors.py
Normal file
|
@ -0,0 +1,107 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Processor functions
|
||||
"""
|
||||
from logging import getLogger
|
||||
|
||||
from .utils import IdentitySet
|
||||
|
||||
from .rules import Rule, RemoveMatch
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
DEFAULT = '__default__'
|
||||
|
||||
POST_PROCESS = -2048
|
||||
PRE_PROCESS = 2048
|
||||
|
||||
|
||||
def _default_conflict_solver(match, conflicting_match):
|
||||
"""
|
||||
Default conflict solver for matches, shorter matches if they conflicts with longer ones
|
||||
|
||||
:param conflicting_match:
|
||||
:type conflicting_match:
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if len(conflicting_match.initiator) < len(match.initiator):
|
||||
return conflicting_match
|
||||
if len(match.initiator) < len(conflicting_match.initiator):
|
||||
return match
|
||||
return None
|
||||
|
||||
|
||||
class ConflictSolver(Rule):
|
||||
"""
|
||||
Remove conflicting matches.
|
||||
"""
|
||||
priority = PRE_PROCESS
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
@property
|
||||
def default_conflict_solver(self): # pylint:disable=no-self-use
|
||||
"""
|
||||
Default conflict solver to use.
|
||||
"""
|
||||
return _default_conflict_solver
|
||||
|
||||
def when(self, matches, context):
|
||||
# pylint:disable=too-many-nested-blocks
|
||||
to_remove_matches = IdentitySet()
|
||||
|
||||
public_matches = [match for match in matches if not match.private]
|
||||
public_matches.sort(key=len)
|
||||
|
||||
for match in public_matches:
|
||||
conflicting_matches = matches.conflicting(match)
|
||||
|
||||
if conflicting_matches:
|
||||
# keep the match only if it's the longest
|
||||
conflicting_matches = [conflicting_match for conflicting_match in conflicting_matches if
|
||||
not conflicting_match.private]
|
||||
conflicting_matches.sort(key=len)
|
||||
|
||||
for conflicting_match in conflicting_matches:
|
||||
conflict_solvers = [(self.default_conflict_solver, False)]
|
||||
|
||||
if match.conflict_solver:
|
||||
conflict_solvers.append((match.conflict_solver, False))
|
||||
if conflicting_match.conflict_solver:
|
||||
conflict_solvers.append((conflicting_match.conflict_solver, True))
|
||||
|
||||
for conflict_solver, reverse in reversed(conflict_solvers):
|
||||
if reverse:
|
||||
to_remove = conflict_solver(conflicting_match, match)
|
||||
else:
|
||||
to_remove = conflict_solver(match, conflicting_match)
|
||||
if to_remove == DEFAULT:
|
||||
continue
|
||||
if to_remove and to_remove not in to_remove_matches:
|
||||
both_matches = [match, conflicting_match]
|
||||
both_matches.remove(to_remove)
|
||||
to_keep = both_matches[0]
|
||||
|
||||
if to_keep not in to_remove_matches:
|
||||
log(self.log_level, "Conflicting match %s will be removed in favor of match %s",
|
||||
to_remove, to_keep)
|
||||
|
||||
to_remove_matches.add(to_remove)
|
||||
break
|
||||
return to_remove_matches
|
||||
|
||||
|
||||
class PrivateRemover(Rule):
|
||||
"""
|
||||
Removes private matches rule.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
return [match for match in matches if match.private]
|
363
libs/common/rebulk/rebulk.py
Normal file
363
libs/common/rebulk/rebulk.py
Normal file
|
@ -0,0 +1,363 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Entry point functions and classes for Rebulk
|
||||
"""
|
||||
from logging import getLogger
|
||||
|
||||
from .match import Matches
|
||||
|
||||
from .pattern import RePattern, StringPattern, FunctionalPattern
|
||||
from .chain import Chain
|
||||
|
||||
from .processors import ConflictSolver, PrivateRemover
|
||||
from .loose import set_defaults
|
||||
from .utils import extend_safe
|
||||
from .rules import Rules
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
class Rebulk(object):
|
||||
r"""
|
||||
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
|
||||
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> from rebulk import Rebulk
|
||||
>>> bulk = Rebulk().string('brown').regex(r'qu\w+').functional(lambda s: (20, 25))
|
||||
|
||||
When ``Rebulk`` object is fully configured, you can call ``matches`` method with an input string to retrieve all
|
||||
``Match`` objects found by registered pattern.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> bulk.matches("The quick brown fox jumps over the lazy dog")
|
||||
[<brown:(10, 15)>, <quick:(4, 9)>, <jumps:(20, 25)>]
|
||||
|
||||
If multiple ``Match`` objects are found at the same position, only the longer one is kept.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> bulk = Rebulk().string('lakers').string('la')
|
||||
>>> bulk.matches("the lakers are from la")
|
||||
[<lakers:(4, 10)>, <la:(20, 22)>]
|
||||
"""
|
||||
# pylint:disable=protected-access
|
||||
|
||||
def __init__(self, disabled=lambda context: False, default_rules=True):
|
||||
"""
|
||||
Creates a new Rebulk object.
|
||||
:param disabled: if True, this pattern is disabled. Can also be a function(context).
|
||||
:type disabled: bool|function
|
||||
:param default_rules: use default rules
|
||||
:type default_rules:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not callable(disabled):
|
||||
self.disabled = lambda context: disabled
|
||||
else:
|
||||
self.disabled = disabled
|
||||
self._patterns = []
|
||||
self._rules = Rules()
|
||||
if default_rules:
|
||||
self.rules(ConflictSolver, PrivateRemover)
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
self._chain_defaults = {}
|
||||
self._rebulks = []
|
||||
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
Add patterns objects
|
||||
|
||||
:param pattern:
|
||||
:type pattern: rebulk.pattern.Pattern
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self._patterns.extend(pattern)
|
||||
return self
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._regex_defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_re(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_re(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new regular expression pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return RePattern(*pattern, **kwargs)
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._string_defaults = kwargs
|
||||
return self
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_string(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._string_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return StringPattern(*pattern, **kwargs)
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._functional_defaults = kwargs
|
||||
return self
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_functional(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return FunctionalPattern(*pattern, **kwargs)
|
||||
|
||||
def chain_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for patterns chain.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._chain_defaults = kwargs
|
||||
return self
|
||||
|
||||
def chain(self, **kwargs):
|
||||
"""
|
||||
Add patterns chain, using configuration of this rebulk
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
chain = self.build_chain(**kwargs)
|
||||
self._patterns.append(chain)
|
||||
return chain
|
||||
|
||||
def build_chain(self, **kwargs):
|
||||
"""
|
||||
Builds a new patterns chain
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._chain_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return Chain(self, **kwargs)
|
||||
|
||||
def rules(self, *rules):
|
||||
"""
|
||||
Add rules as a module, class or instance.
|
||||
:param rules:
|
||||
:type rules: list[Rule]
|
||||
:return:
|
||||
"""
|
||||
self._rules.load(*rules)
|
||||
return self
|
||||
|
||||
def rebulk(self, *rebulks):
|
||||
"""
|
||||
Add a children rebulk object
|
||||
:param rebulks:
|
||||
:type rebulks: Rebulk
|
||||
:return:
|
||||
"""
|
||||
self._rebulks.extend(rebulks)
|
||||
return self
|
||||
|
||||
def matches(self, string, context=None):
|
||||
"""
|
||||
Search for all matches with current configuration against input_string
|
||||
:param string: string to search into
|
||||
:type string: str
|
||||
:param context: context to use
|
||||
:type context: dict
|
||||
:return: A custom list of matches
|
||||
:rtype: Matches
|
||||
"""
|
||||
matches = Matches(input_string=string)
|
||||
if context is None:
|
||||
context = {}
|
||||
|
||||
self._matches_patterns(matches, context)
|
||||
|
||||
self._execute_rules(matches, context)
|
||||
|
||||
return matches
|
||||
|
||||
def effective_rules(self, context=None):
|
||||
"""
|
||||
Get effective rules for this rebulk object and its children.
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
rules = Rules()
|
||||
rules.extend(self._rules)
|
||||
for rebulk in self._rebulks:
|
||||
if not rebulk.disabled(context):
|
||||
extend_safe(rules, rebulk._rules)
|
||||
return rules
|
||||
|
||||
def _execute_rules(self, matches, context):
|
||||
"""
|
||||
Execute rules for this rebulk and children.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not self.disabled(context):
|
||||
rules = self.effective_rules(context)
|
||||
rules.execute_all_rules(matches, context)
|
||||
|
||||
def effective_patterns(self, context=None):
|
||||
"""
|
||||
Get effective patterns for this rebulk object and its children.
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
patterns = list(self._patterns)
|
||||
for rebulk in self._rebulks:
|
||||
if not rebulk.disabled(context):
|
||||
extend_safe(patterns, rebulk._patterns)
|
||||
return patterns
|
||||
|
||||
def _matches_patterns(self, matches, context):
|
||||
"""
|
||||
Search for all matches with current paterns agains input_string
|
||||
:param matches: matches list
|
||||
:type matches: Matches
|
||||
:param context: context to use
|
||||
:type context: dict
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not self.disabled(context):
|
||||
patterns = self.effective_patterns(context)
|
||||
for pattern in patterns:
|
||||
if not pattern.disabled(context):
|
||||
pattern_matches = pattern.matches(matches.input_string, context)
|
||||
if pattern_matches:
|
||||
log(pattern.log_level, "Pattern has %s match(es). (%s)", len(pattern_matches), pattern)
|
||||
else:
|
||||
pass
|
||||
# log(pattern.log_level, "Pattern doesn't match. (%s)" % (pattern,))
|
||||
for match in pattern_matches:
|
||||
if match.marker:
|
||||
log(pattern.log_level, "Marker found. (%s)", match)
|
||||
matches.markers.append(match)
|
||||
else:
|
||||
log(pattern.log_level, "Match found. (%s)", match)
|
||||
matches.append(match)
|
||||
else:
|
||||
log(pattern.log_level, "Pattern is disabled. (%s)", pattern)
|
17
libs/common/rebulk/remodule.py
Normal file
17
libs/common/rebulk/remodule.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Uniform re module
|
||||
"""
|
||||
# pylint: disable-all
|
||||
import os
|
||||
|
||||
REGEX_AVAILABLE = False
|
||||
if os.environ.get('REGEX_DISABLED') in ["1", "true", "True", "Y"]:
|
||||
import re
|
||||
else:
|
||||
try:
|
||||
import regex as re
|
||||
REGEX_AVAILABLE = True
|
||||
except ImportError:
|
||||
import re
|
373
libs/common/rebulk/rules.py
Normal file
373
libs/common/rebulk/rules.py
Normal file
|
@ -0,0 +1,373 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Abstract rule class definition and rule engine implementation
|
||||
"""
|
||||
from abc import ABCMeta, abstractmethod
|
||||
import inspect
|
||||
from itertools import groupby
|
||||
from logging import getLogger
|
||||
|
||||
import six
|
||||
from .utils import is_iterable
|
||||
|
||||
from .toposort import toposort
|
||||
|
||||
from . import debug
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Consequence(object):
|
||||
"""
|
||||
Definition of a consequence to apply.
|
||||
"""
|
||||
@abstractmethod
|
||||
def then(self, matches, when_response, context): # pragma: no cover
|
||||
"""
|
||||
Action implementation.
|
||||
|
||||
:param matches:
|
||||
:type matches: rebulk.match.Matches
|
||||
:param context:
|
||||
:type context:
|
||||
:param when_response: return object from when call.
|
||||
:type when_response: object
|
||||
:return: True if the action was runned, False if it wasn't.
|
||||
:rtype: bool
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Condition(object):
|
||||
"""
|
||||
Definition of a condition to check.
|
||||
"""
|
||||
@abstractmethod
|
||||
def when(self, matches, context): # pragma: no cover
|
||||
"""
|
||||
Condition implementation.
|
||||
|
||||
:param matches:
|
||||
:type matches: rebulk.match.Matches
|
||||
:param context:
|
||||
:type context:
|
||||
:return: truthy if rule should be triggered and execute then action, falsy if it should not.
|
||||
:rtype: object
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class CustomRule(Condition, Consequence):
|
||||
"""
|
||||
Definition of a rule to apply
|
||||
"""
|
||||
# pylint: disable=no-self-use, unused-argument, abstract-method
|
||||
priority = 0
|
||||
name = None
|
||||
dependency = None
|
||||
properties = {}
|
||||
|
||||
def __init__(self, log_level=None):
|
||||
self.defined_at = debug.defined_at()
|
||||
if log_level is None and not hasattr(self, 'log_level'):
|
||||
self.log_level = debug.LOG_LEVEL
|
||||
|
||||
def enabled(self, context):
|
||||
"""
|
||||
Disable rule.
|
||||
|
||||
:param context:
|
||||
:type context:
|
||||
:return: True if rule is enabled, False if disabled
|
||||
:rtype: bool
|
||||
"""
|
||||
return True
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.priority > other.priority
|
||||
|
||||
def __repr__(self):
|
||||
defined = ""
|
||||
if self.defined_at:
|
||||
defined = "@%s" % (self.defined_at,)
|
||||
return "<%s%s>" % (self.name if self.name else self.__class__.__name__, defined)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__class__ == other.__class__
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.__class__)
|
||||
|
||||
|
||||
class Rule(CustomRule):
|
||||
"""
|
||||
Definition of a rule to apply
|
||||
"""
|
||||
# pylint:disable=abstract-method
|
||||
consequence = None
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert self.consequence
|
||||
if is_iterable(self.consequence):
|
||||
if not is_iterable(when_response):
|
||||
when_response = [when_response]
|
||||
iterator = iter(when_response)
|
||||
for cons in self.consequence: #pylint: disable=not-an-iterable
|
||||
if inspect.isclass(cons):
|
||||
cons = cons()
|
||||
cons.then(matches, next(iterator), context)
|
||||
else:
|
||||
cons = self.consequence
|
||||
if inspect.isclass(cons):
|
||||
cons = cons() # pylint:disable=not-callable
|
||||
cons.then(matches, when_response, context)
|
||||
|
||||
|
||||
class RemoveMatch(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Remove matches returned by then
|
||||
"""
|
||||
def then(self, matches, when_response, context):
|
||||
if is_iterable(when_response):
|
||||
ret = []
|
||||
when_response = list(when_response)
|
||||
for match in when_response:
|
||||
if match in matches:
|
||||
matches.remove(match)
|
||||
ret.append(match)
|
||||
return ret
|
||||
if when_response in matches:
|
||||
matches.remove(when_response)
|
||||
return when_response
|
||||
|
||||
|
||||
class AppendMatch(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Append matches returned by then
|
||||
"""
|
||||
def __init__(self, match_name=None):
|
||||
self.match_name = match_name
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
if is_iterable(when_response):
|
||||
ret = []
|
||||
when_response = list(when_response)
|
||||
for match in when_response:
|
||||
if match not in matches:
|
||||
if self.match_name:
|
||||
match.name = self.match_name
|
||||
matches.append(match)
|
||||
ret.append(match)
|
||||
return ret
|
||||
if self.match_name:
|
||||
when_response.name = self.match_name
|
||||
if when_response not in matches:
|
||||
matches.append(when_response)
|
||||
return when_response
|
||||
|
||||
|
||||
class RenameMatch(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Rename matches returned by then
|
||||
"""
|
||||
def __init__(self, match_name):
|
||||
self.match_name = match_name
|
||||
self.remove = RemoveMatch()
|
||||
self.append = AppendMatch()
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
removed = self.remove.then(matches, when_response, context)
|
||||
if is_iterable(removed):
|
||||
removed = list(removed)
|
||||
for match in removed:
|
||||
match.name = self.match_name
|
||||
elif removed:
|
||||
removed.name = self.match_name
|
||||
if removed:
|
||||
self.append.then(matches, removed, context)
|
||||
|
||||
|
||||
class AppendTags(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Add tags to returned matches
|
||||
"""
|
||||
def __init__(self, tags):
|
||||
self.tags = tags
|
||||
self.remove = RemoveMatch()
|
||||
self.append = AppendMatch()
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
removed = self.remove.then(matches, when_response, context)
|
||||
if is_iterable(removed):
|
||||
removed = list(removed)
|
||||
for match in removed:
|
||||
match.tags.extend(self.tags)
|
||||
elif removed:
|
||||
removed.tags.extend(self.tags) # pylint: disable=no-member
|
||||
if removed:
|
||||
self.append.then(matches, removed, context)
|
||||
|
||||
|
||||
class RemoveTags(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Remove tags from returned matches
|
||||
"""
|
||||
def __init__(self, tags):
|
||||
self.tags = tags
|
||||
self.remove = RemoveMatch()
|
||||
self.append = AppendMatch()
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
removed = self.remove.then(matches, when_response, context)
|
||||
if is_iterable(removed):
|
||||
removed = list(removed)
|
||||
for match in removed:
|
||||
for tag in self.tags:
|
||||
if tag in match.tags:
|
||||
match.tags.remove(tag)
|
||||
elif removed:
|
||||
for tag in self.tags:
|
||||
if tag in removed.tags: # pylint: disable=no-member
|
||||
removed.tags.remove(tag) # pylint: disable=no-member
|
||||
if removed:
|
||||
self.append.then(matches, removed, context)
|
||||
|
||||
|
||||
class Rules(list):
|
||||
"""
|
||||
list of rules ready to execute.
|
||||
"""
|
||||
|
||||
def __init__(self, *rules):
|
||||
super(Rules, self).__init__()
|
||||
self.load(*rules)
|
||||
|
||||
def load(self, *rules):
|
||||
"""
|
||||
Load rules from a Rule module, class or instance
|
||||
|
||||
:param rules:
|
||||
:type rules:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for rule in rules:
|
||||
if inspect.ismodule(rule):
|
||||
self.load_module(rule)
|
||||
elif inspect.isclass(rule):
|
||||
self.load_class(rule)
|
||||
else:
|
||||
self.append(rule)
|
||||
|
||||
def load_module(self, module):
|
||||
"""
|
||||
Load a rules module
|
||||
|
||||
:param module:
|
||||
:type module:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint: disable=unused-variable
|
||||
for name, obj in inspect.getmembers(module,
|
||||
lambda member: hasattr(member, '__module__')
|
||||
and member.__module__ == module.__name__
|
||||
and inspect.isclass):
|
||||
self.load_class(obj)
|
||||
|
||||
def load_class(self, class_):
|
||||
"""
|
||||
Load a Rule class.
|
||||
|
||||
:param class_:
|
||||
:type class_:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self.append(class_())
|
||||
|
||||
def execute_all_rules(self, matches, context):
|
||||
"""
|
||||
Execute all rules from this rules list. All when condition with same priority will be performed before
|
||||
calling then actions.
|
||||
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = []
|
||||
for priority, priority_rules in groupby(sorted(self), lambda rule: rule.priority):
|
||||
sorted_rules = toposort_rules(list(priority_rules)) # Group by dependency graph toposort
|
||||
for rules_group in sorted_rules:
|
||||
rules_group = list(sorted(rules_group, key=self.index)) # Sort rules group based on initial ordering.
|
||||
group_log_level = None
|
||||
for rule in rules_group:
|
||||
if group_log_level is None or group_log_level < rule.log_level:
|
||||
group_log_level = rule.log_level
|
||||
log(group_log_level, "%s independent rule(s) at priority %s.", len(rules_group), priority)
|
||||
for rule in rules_group:
|
||||
when_response = execute_rule(rule, matches, context)
|
||||
if when_response is not None:
|
||||
ret.append((rule, when_response))
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def execute_rule(rule, matches, context):
|
||||
"""
|
||||
Execute the given rule.
|
||||
:param rule:
|
||||
:type rule:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if rule.enabled(context):
|
||||
log(rule.log_level, "Checking rule condition: %s", rule)
|
||||
when_response = rule.when(matches, context)
|
||||
if when_response:
|
||||
log(rule.log_level, "Rule was triggered: %s", when_response)
|
||||
log(rule.log_level, "Running rule consequence: %s %s", rule, when_response)
|
||||
rule.then(matches, when_response, context)
|
||||
return when_response
|
||||
else:
|
||||
log(rule.log_level, "Rule is disabled: %s", rule)
|
||||
|
||||
def toposort_rules(rules):
|
||||
"""
|
||||
Sort given rules using toposort with dependency parameter.
|
||||
:param rules:
|
||||
:type rules:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
graph = {}
|
||||
class_dict = {}
|
||||
for rule in rules:
|
||||
if rule.__class__ in class_dict:
|
||||
raise ValueError("Duplicate class rules are not allowed: %s" % rule.__class__)
|
||||
class_dict[rule.__class__] = rule
|
||||
for rule in rules:
|
||||
if not is_iterable(rule.dependency) and rule.dependency:
|
||||
rule_dependencies = [rule.dependency]
|
||||
else:
|
||||
rule_dependencies = rule.dependency
|
||||
dependencies = set()
|
||||
if rule_dependencies:
|
||||
for dependency in rule_dependencies:
|
||||
if inspect.isclass(dependency):
|
||||
dependency = class_dict.get(dependency)
|
||||
if dependency:
|
||||
dependencies.add(dependency)
|
||||
graph[rule] = dependencies
|
||||
return toposort(graph)
|
3
libs/common/rebulk/test/__init__.py
Normal file
3
libs/common/rebulk/test/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring
|
79
libs/common/rebulk/test/default_rules_module.py
Normal file
79
libs/common/rebulk/test/default_rules_module.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
|
||||
from ..match import Match
|
||||
from ..rules import Rule, RemoveMatch, AppendMatch, RenameMatch, AppendTags, RemoveTags
|
||||
|
||||
|
||||
class RuleRemove0(Rule):
|
||||
consequence = RemoveMatch
|
||||
def when(self, matches, context):
|
||||
return matches[0]
|
||||
|
||||
|
||||
class RuleAppend0(Rule):
|
||||
consequence = AppendMatch()
|
||||
def when(self, matches, context):
|
||||
return Match(5, 10)
|
||||
|
||||
class RuleRename0(Rule):
|
||||
consequence = [RenameMatch('renamed')]
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10, name="original")]
|
||||
|
||||
class RuleRemove1(Rule):
|
||||
consequence = [RemoveMatch()]
|
||||
def when(self, matches, context):
|
||||
return [matches[0]]
|
||||
|
||||
class RuleAppend1(Rule):
|
||||
consequence = [AppendMatch]
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10)]
|
||||
|
||||
class RuleRename1(Rule):
|
||||
consequence = RenameMatch('renamed')
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10, name="original")]
|
||||
|
||||
class RuleAppend2(Rule):
|
||||
consequence = [AppendMatch('renamed')]
|
||||
properties = {'renamed': [None]}
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10)]
|
||||
|
||||
class RuleRename2(Rule):
|
||||
consequence = RenameMatch('renamed')
|
||||
def when(self, matches, context):
|
||||
return Match(5, 10, name="original")
|
||||
|
||||
class RuleAppend3(Rule):
|
||||
consequence = AppendMatch('renamed')
|
||||
properties = {'renamed': [None]}
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10)]
|
||||
|
||||
class RuleRename3(Rule):
|
||||
consequence = [RenameMatch('renamed')]
|
||||
def when(self, matches, context):
|
||||
return Match(5, 10, name="original")
|
||||
|
||||
class RuleAppendTags0(Rule):
|
||||
consequence = AppendTags(['new-tag'])
|
||||
def when(self, matches, context):
|
||||
return matches.named('tags', 0)
|
||||
|
||||
class RuleRemoveTags0(Rule):
|
||||
consequence = RemoveTags(['new-tag'])
|
||||
def when(self, matches, context):
|
||||
return matches.named('tags', 0)
|
||||
|
||||
class RuleAppendTags1(Rule):
|
||||
consequence = AppendTags(['new-tag'])
|
||||
def when(self, matches, context):
|
||||
return matches.named('tags')
|
||||
|
||||
class RuleRemoveTags1(Rule):
|
||||
consequence = RemoveTags(['new-tag'])
|
||||
def when(self, matches, context):
|
||||
return matches.named('tags')
|
38
libs/common/rebulk/test/rebulk_rules_module.py
Normal file
38
libs/common/rebulk/test/rebulk_rules_module.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
|
||||
from rebulk.rules import Rule, RemoveMatch, CustomRule
|
||||
|
||||
|
||||
class RemoveAllButLastYear(Rule):
|
||||
consequence = RemoveMatch
|
||||
def when(self, matches, context):
|
||||
entries = matches.named('year')
|
||||
return entries[:-1]
|
||||
|
||||
|
||||
class PrefixedSuffixedYear(CustomRule):
|
||||
def when(self, matches, context):
|
||||
toRemove = []
|
||||
years = matches.named('year')
|
||||
for year in years:
|
||||
if not matches.previous(year, lambda p: p.name == 'yearPrefix') and \
|
||||
not matches.next(year, lambda n: n.name == 'yearSuffix'):
|
||||
toRemove.append(year)
|
||||
return toRemove
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
for to_remove in when_response:
|
||||
matches.remove(to_remove)
|
||||
|
||||
|
||||
class PrefixedSuffixedYearNoLambda(Rule):
|
||||
consequence = RemoveMatch
|
||||
def when(self, matches, context):
|
||||
toRemove = []
|
||||
years = matches.named('year')
|
||||
for year in years:
|
||||
if not [m for m in matches.previous(year) if m.name == 'yearPrefix'] and \
|
||||
not [m for m in matches.next(year) if m.name == 'yearSuffix']:
|
||||
toRemove.append(year)
|
||||
return toRemove
|
54
libs/common/rebulk/test/rules_module.py
Normal file
54
libs/common/rebulk/test/rules_module.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
|
||||
from ..match import Match
|
||||
from ..rules import Rule
|
||||
|
||||
|
||||
class Rule3(Rule):
|
||||
def when(self, matches, context):
|
||||
return context.get('when')
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert when_response in [True, False]
|
||||
matches.append(Match(3, 4))
|
||||
|
||||
|
||||
class Rule2(Rule):
|
||||
dependency = Rule3
|
||||
|
||||
def when(self, matches, context):
|
||||
return True
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert when_response
|
||||
matches.append(Match(3, 4))
|
||||
|
||||
|
||||
class Rule1(Rule):
|
||||
dependency = Rule2
|
||||
|
||||
def when(self, matches, context):
|
||||
return True
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert when_response
|
||||
matches.clear()
|
||||
|
||||
|
||||
class Rule0(Rule):
|
||||
dependency = Rule1
|
||||
|
||||
def when(self, matches, context):
|
||||
return True
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert when_response
|
||||
matches.append(Match(3, 4))
|
||||
|
||||
|
||||
class Rule1Disabled(Rule1):
|
||||
name = "Disabled Rule1"
|
||||
|
||||
def enabled(self, context):
|
||||
return False
|
411
libs/common/rebulk/test/test_chain.py
Normal file
411
libs/common/rebulk/test/test_chain.py
Normal file
|
@ -0,0 +1,411 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
|
||||
import re
|
||||
|
||||
from functools import partial
|
||||
|
||||
from ..validators import chars_surround
|
||||
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
|
||||
|
||||
|
||||
def test_chain_close():
|
||||
rebulk = Rebulk()
|
||||
ret = rebulk.chain().close()
|
||||
|
||||
assert ret == rebulk
|
||||
assert len(rebulk.effective_patterns()) == 1
|
||||
|
||||
|
||||
def test_build_chain():
|
||||
rebulk = Rebulk()
|
||||
|
||||
def digit(input_string):
|
||||
i = input_string.find("1849")
|
||||
if i > -1:
|
||||
return i, i + len("1849")
|
||||
|
||||
ret = rebulk.chain() \
|
||||
.functional(digit) \
|
||||
.string("test").repeater(2) \
|
||||
.string("x").repeater('{1,3}') \
|
||||
.string("optional").repeater('?') \
|
||||
.regex("f?x").repeater('+') \
|
||||
.close()
|
||||
|
||||
assert ret == rebulk
|
||||
assert len(rebulk.effective_patterns()) == 1
|
||||
|
||||
chain = rebulk.effective_patterns()[0]
|
||||
|
||||
assert len(chain.parts) == 5
|
||||
|
||||
assert isinstance(chain.parts[0].pattern, FunctionalPattern)
|
||||
assert chain.parts[0].repeater_start == 1
|
||||
assert chain.parts[0].repeater_end == 1
|
||||
|
||||
assert isinstance(chain.parts[1].pattern, StringPattern)
|
||||
assert chain.parts[1].repeater_start == 2
|
||||
assert chain.parts[1].repeater_end == 2
|
||||
|
||||
assert isinstance(chain.parts[2].pattern, StringPattern)
|
||||
assert chain.parts[2].repeater_start == 1
|
||||
assert chain.parts[2].repeater_end == 3
|
||||
|
||||
assert isinstance(chain.parts[3].pattern, StringPattern)
|
||||
assert chain.parts[3].repeater_start == 0
|
||||
assert chain.parts[3].repeater_end == 1
|
||||
|
||||
assert isinstance(chain.parts[4].pattern, RePattern)
|
||||
assert chain.parts[4].repeater_start == 1
|
||||
assert chain.parts[4].repeater_end is None
|
||||
|
||||
|
||||
def test_chain_defaults():
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
|
||||
|
||||
rebulk.chain()\
|
||||
.regex("(?P<test>test)") \
|
||||
.regex(" ").repeater("*") \
|
||||
.regex("(?P<testIgnore>testIgnore)")
|
||||
matches = rebulk.matches("test testIgnore")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
|
||||
|
||||
def test_matches():
|
||||
rebulk = Rebulk()
|
||||
|
||||
def digit(input_string):
|
||||
i = input_string.find("1849")
|
||||
if i > -1:
|
||||
return i, i + len("1849")
|
||||
|
||||
input_string = "1849testtestxxfixfux_foxabc1849testtestxoptionalfoxabc"
|
||||
|
||||
chain = rebulk.chain() \
|
||||
.functional(digit) \
|
||||
.string("test").hidden().repeater(2) \
|
||||
.string("x").hidden().repeater('{1,3}') \
|
||||
.string("optional").hidden().repeater('?') \
|
||||
.regex("f.?x", name='result').repeater('+') \
|
||||
.close()
|
||||
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 2
|
||||
children = matches[0].children
|
||||
|
||||
assert children[0].value == '1849'
|
||||
assert children[1].value == 'fix'
|
||||
assert children[2].value == 'fux'
|
||||
|
||||
children = matches[1].children
|
||||
assert children[0].value == '1849'
|
||||
assert children[1].value == 'fox'
|
||||
|
||||
input_string = "_1850testtestxoptionalfoxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtesttesttestxoptionalfoxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtestxxxxoptionalfoxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtestoptionalfoxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtestxoptionalabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtestxoptionalfaxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 1
|
||||
children = matches[0].children
|
||||
|
||||
assert children[0].value == '1849'
|
||||
assert children[1].value == 'fax'
|
||||
|
||||
|
||||
def test_matches_2():
|
||||
rebulk = Rebulk() \
|
||||
.regex_defaults(flags=re.IGNORECASE) \
|
||||
.chain(children=True, formatter={'episode': int}) \
|
||||
.defaults(formatter={'version': int}) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
|
||||
.close()
|
||||
|
||||
matches = rebulk.matches("This is E14v2-15E16x17")
|
||||
assert len(matches) == 5
|
||||
|
||||
assert matches[0].name == 'episode'
|
||||
assert matches[0].value == 14
|
||||
|
||||
assert matches[1].name == 'version'
|
||||
assert matches[1].value == 2
|
||||
|
||||
assert matches[2].name == 'episode'
|
||||
assert matches[2].value == 15
|
||||
|
||||
assert matches[3].name == 'episode'
|
||||
assert matches[3].value == 16
|
||||
|
||||
assert matches[4].name == 'episode'
|
||||
assert matches[4].value == 17
|
||||
|
||||
|
||||
def test_matches_3():
|
||||
alt_dash = (r'@', r'[\W_]') # abbreviation
|
||||
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.chain(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
private_names=['episodeSeparator', 'seasonSeparator'],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=lambda match, other: match
|
||||
if match.name in ['season', 'episode'] and other.name in
|
||||
['screen_size', 'video_codec', 'audio_codec',
|
||||
'audio_channels', 'container', 'date']
|
||||
else '__default__') \
|
||||
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
|
||||
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
|
||||
.chain() \
|
||||
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
|
||||
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
|
||||
.chain() \
|
||||
.regex(r'S(?P<season>\d+)') \
|
||||
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
|
||||
|
||||
matches = rebulk.matches("test-01x02-03")
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].name == 'season'
|
||||
assert matches[0].value == 1
|
||||
|
||||
assert matches[1].name == 'episode'
|
||||
assert matches[1].value == 2
|
||||
|
||||
assert matches[2].name == 'episode'
|
||||
assert matches[2].value == 3
|
||||
|
||||
matches = rebulk.matches("test-S01E02-03")
|
||||
|
||||
assert len(matches) == 3
|
||||
assert matches[0].name == 'season'
|
||||
assert matches[0].value == 1
|
||||
|
||||
assert matches[1].name == 'episode'
|
||||
assert matches[1].value == 2
|
||||
|
||||
assert matches[2].name == 'episode'
|
||||
assert matches[2].value == 3
|
||||
|
||||
matches = rebulk.matches("test-S01-02-03-04")
|
||||
|
||||
assert len(matches) == 4
|
||||
assert matches[0].name == 'season'
|
||||
assert matches[0].value == 1
|
||||
|
||||
assert matches[1].name == 'season'
|
||||
assert matches[1].value == 2
|
||||
|
||||
assert matches[2].name == 'season'
|
||||
assert matches[2].value == 3
|
||||
|
||||
assert matches[3].name == 'season'
|
||||
assert matches[3].value == 4
|
||||
|
||||
|
||||
def test_matches_4():
|
||||
seps_surround = partial(chars_surround, " ")
|
||||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03")
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].value == 1
|
||||
assert matches[1].value == 2
|
||||
assert matches[2].value == 3
|
||||
|
||||
|
||||
def test_matches_5():
|
||||
seps_surround = partial(chars_surround, " ")
|
||||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03")
|
||||
assert len(matches) == 3
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02")
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03E04E05E06") # Parent can't be validated, so no results at all
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
def test_matches_6():
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator=None, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03")
|
||||
assert len(matches) == 3
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02")
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03E04E05E06") # No validator on parent, so it should give 4 episodes.
|
||||
assert len(matches) == 4
|
||||
|
||||
|
||||
def test_matches_7():
|
||||
seps_surround = partial(chars_surround, ' .-/')
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(). \
|
||||
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
|
||||
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some S01")
|
||||
assert len(matches) == 1
|
||||
matches[0].value = 1
|
||||
|
||||
matches = rebulk.matches("Some S01-02")
|
||||
assert len(matches) == 2
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
|
||||
matches = rebulk.matches("programs4/Some S01-02")
|
||||
assert len(matches) == 2
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
|
||||
matches = rebulk.matches("programs4/SomeS01middle.S02-03.andS04here")
|
||||
assert len(matches) == 2
|
||||
matches[0].value = 2
|
||||
matches[1].value = 3
|
||||
|
||||
matches = rebulk.matches("Some 02.and.S04-05.here")
|
||||
assert len(matches) == 2
|
||||
matches[0].value = 4
|
||||
matches[1].value = 5
|
||||
|
||||
|
||||
def test_chain_breaker():
|
||||
def chain_breaker(matches):
|
||||
seasons = matches.named('season')
|
||||
if len(seasons) > 1:
|
||||
if seasons[-1].value - seasons[-2].value > 10:
|
||||
return True
|
||||
return False
|
||||
|
||||
seps_surround = partial(chars_surround, ' .-/')
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(children=True, private_parent=True, formatter={'season': int})
|
||||
|
||||
rebulk.chain(chain_breaker=chain_breaker). \
|
||||
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
|
||||
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some S01-02-03-50-51")
|
||||
assert len(matches) == 3
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
matches[2].value = 3
|
||||
|
||||
|
||||
def test_chain_breaker_defaults():
|
||||
def chain_breaker(matches):
|
||||
seasons = matches.named('season')
|
||||
if len(seasons) > 1:
|
||||
if seasons[-1].value - seasons[-2].value > 10:
|
||||
return True
|
||||
return False
|
||||
|
||||
seps_surround = partial(chars_surround, ' .-/')
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(chain_breaker=chain_breaker, children=True, private_parent=True, formatter={'season': int})
|
||||
|
||||
rebulk.chain(). \
|
||||
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
|
||||
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some S01-02-03-50-51")
|
||||
assert len(matches) == 3
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
matches[2].value = 3
|
||||
|
||||
|
||||
def test_chain_breaker_defaults2():
|
||||
def chain_breaker(matches):
|
||||
seasons = matches.named('season')
|
||||
if len(seasons) > 1:
|
||||
if seasons[-1].value - seasons[-2].value > 10:
|
||||
return True
|
||||
return False
|
||||
|
||||
seps_surround = partial(chars_surround, ' .-/')
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.chain_defaults(chain_breaker=chain_breaker)
|
||||
rebulk.defaults(children=True, private_parent=True, formatter={'season': int})
|
||||
|
||||
rebulk.chain(). \
|
||||
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
|
||||
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some S01-02-03-50-51")
|
||||
assert len(matches) == 3
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
matches[2].value = 3
|
83
libs/common/rebulk/test/test_debug.py
Normal file
83
libs/common/rebulk/test/test_debug.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
|
||||
|
||||
from ..pattern import StringPattern
|
||||
from ..rebulk import Rebulk
|
||||
from ..match import Match
|
||||
from .. import debug
|
||||
from .default_rules_module import RuleRemove0
|
||||
|
||||
|
||||
class TestDebug(object):
|
||||
|
||||
|
||||
#request.addfinalizer(disable_debug)
|
||||
|
||||
|
||||
|
||||
debug.DEBUG = True
|
||||
pattern = StringPattern(1, 3, value="es")
|
||||
|
||||
match = Match(1, 3, value="es")
|
||||
rule = RuleRemove0()
|
||||
|
||||
input_string = "This is a debug test"
|
||||
rebulk = Rebulk().string("debug") \
|
||||
.string("is")
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
debug.DEBUG = False
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
debug.DEBUG = True
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
debug.DEBUG = False
|
||||
|
||||
def test_pattern(self):
|
||||
assert self.pattern.defined_at.lineno == 20
|
||||
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.pattern.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.pattern.defined_at) == 'test_debug.py#L20'
|
||||
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
|
||||
|
||||
def test_match(self):
|
||||
assert self.match.defined_at.lineno == 22
|
||||
assert self.match.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.match.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.match.defined_at) == 'test_debug.py#L22'
|
||||
|
||||
def test_rule(self):
|
||||
assert self.rule.defined_at.lineno == 23
|
||||
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rule.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rule.defined_at) == 'test_debug.py#L23'
|
||||
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
|
||||
|
||||
def test_rebulk(self):
|
||||
"""
|
||||
This test fails on travis CI, can't find out why there's 1 line offset ...
|
||||
"""
|
||||
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
|
||||
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
|
||||
|
||||
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
|
||||
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
|
||||
|
||||
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
|
||||
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at
|
||||
|
||||
def test_repr(self):
|
||||
str(self.matches)
|
138
libs/common/rebulk/test/test_introspector.py
Normal file
138
libs/common/rebulk/test/test_introspector.py
Normal file
|
@ -0,0 +1,138 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Introspector tests
|
||||
"""
|
||||
# pylint: disable=no-self-use,pointless-statement,missing-docstring,protected-access,invalid-name,len-as-condition
|
||||
from ..rebulk import Rebulk
|
||||
from .. import introspector
|
||||
from .default_rules_module import RuleAppend2, RuleAppend3
|
||||
|
||||
|
||||
def test_string_introspector():
|
||||
rebulk = Rebulk().string('One', 'Two', 'Three', name='first').string('1', '2', '3', name='second')
|
||||
|
||||
introspected = introspector.introspect(rebulk, None)
|
||||
|
||||
assert len(introspected.patterns) == 2
|
||||
|
||||
first_properties = introspected.patterns[0].properties
|
||||
assert len(first_properties) == 1
|
||||
first_properties['first'] == ['One', 'Two', 'Three']
|
||||
|
||||
second_properties = introspected.patterns[1].properties
|
||||
assert len(second_properties) == 1
|
||||
second_properties['second'] == ['1', '2', '3']
|
||||
|
||||
properties = introspected.properties
|
||||
assert len(properties) == 2
|
||||
assert properties['first'] == first_properties['first']
|
||||
assert properties['second'] == second_properties['second']
|
||||
|
||||
|
||||
def test_string_properties():
|
||||
rebulk = Rebulk()\
|
||||
.string('One', 'Two', 'Three', name='first', properties={'custom': ['One']})\
|
||||
.string('1', '2', '3', name='second', properties={'custom': [1]})
|
||||
|
||||
introspected = introspector.introspect(rebulk, None)
|
||||
|
||||
assert len(introspected.patterns) == 2
|
||||
assert len(introspected.rules) == 2
|
||||
|
||||
first_properties = introspected.patterns[0].properties
|
||||
assert len(first_properties) == 1
|
||||
first_properties['custom'] == ['One']
|
||||
|
||||
second_properties = introspected.patterns[1].properties
|
||||
assert len(second_properties) == 1
|
||||
second_properties['custom'] == [1]
|
||||
|
||||
properties = introspected.properties
|
||||
assert len(properties) == 1
|
||||
assert properties['custom'] == ['One', 1]
|
||||
|
||||
|
||||
def test_various_pattern():
|
||||
rebulk = Rebulk()\
|
||||
.regex('One', 'Two', 'Three', name='first', value="string") \
|
||||
.string('1', '2', '3', name='second', value="digit") \
|
||||
.string('4', '5', '6', name='third') \
|
||||
.string('private', private=True) \
|
||||
.functional(lambda string: (0, 5), name='func', value='test') \
|
||||
.regex('One', 'Two', 'Three', name='regex_name') \
|
||||
.regex('(?P<one>One)(?P<two>Two)(?P<three>Three)') \
|
||||
.functional(lambda string: (6, 10), name='func2') \
|
||||
.string('7', name='third')
|
||||
|
||||
introspected = introspector.introspect(rebulk, None)
|
||||
|
||||
assert len(introspected.patterns) == 8
|
||||
assert len(introspected.rules) == 2
|
||||
|
||||
first_properties = introspected.patterns[0].properties
|
||||
assert len(first_properties) == 1
|
||||
first_properties['first'] == ['string']
|
||||
|
||||
second_properties = introspected.patterns[1].properties
|
||||
assert len(second_properties) == 1
|
||||
second_properties['second'] == ['digit']
|
||||
|
||||
third_properties = introspected.patterns[2].properties
|
||||
assert len(third_properties) == 1
|
||||
third_properties['third'] == ['4', '5', '6']
|
||||
|
||||
func_properties = introspected.patterns[3].properties
|
||||
assert len(func_properties) == 1
|
||||
func_properties['func'] == ['test']
|
||||
|
||||
regex_name_properties = introspected.patterns[4].properties
|
||||
assert len(regex_name_properties) == 1
|
||||
regex_name_properties['regex_name'] == [None]
|
||||
|
||||
regex_groups_properties = introspected.patterns[5].properties
|
||||
assert len(regex_groups_properties) == 3
|
||||
regex_groups_properties['one'] == [None]
|
||||
regex_groups_properties['two'] == [None]
|
||||
regex_groups_properties['three'] == [None]
|
||||
|
||||
func2_properties = introspected.patterns[6].properties
|
||||
assert len(func2_properties) == 1
|
||||
func2_properties['func2'] == [None]
|
||||
|
||||
append_third_properties = introspected.patterns[7].properties
|
||||
assert len(append_third_properties) == 1
|
||||
append_third_properties['third'] == [None]
|
||||
|
||||
properties = introspected.properties
|
||||
assert len(properties) == 9
|
||||
assert properties['first'] == first_properties['first']
|
||||
assert properties['second'] == second_properties['second']
|
||||
assert properties['third'] == third_properties['third'] + append_third_properties['third']
|
||||
assert properties['func'] == func_properties['func']
|
||||
assert properties['regex_name'] == regex_name_properties['regex_name']
|
||||
assert properties['one'] == regex_groups_properties['one']
|
||||
assert properties['two'] == regex_groups_properties['two']
|
||||
assert properties['three'] == regex_groups_properties['three']
|
||||
assert properties['func2'] == func2_properties['func2']
|
||||
|
||||
|
||||
def test_rule_properties():
|
||||
rebulk = Rebulk(default_rules=False).rules(RuleAppend2, RuleAppend3)
|
||||
|
||||
introspected = introspector.introspect(rebulk, None)
|
||||
|
||||
assert len(introspected.rules) == 2
|
||||
assert len(introspected.patterns) == 0
|
||||
|
||||
rule_properties = introspected.rules[0].properties
|
||||
assert len(rule_properties) == 1
|
||||
assert rule_properties['renamed'] == [None]
|
||||
|
||||
rule_properties = introspected.rules[1].properties
|
||||
assert len(rule_properties) == 1
|
||||
assert rule_properties['renamed'] == [None]
|
||||
|
||||
properties = introspected.properties
|
||||
assert len(properties) == 1
|
||||
assert properties['renamed'] == [None]
|
83
libs/common/rebulk/test/test_loose.py
Normal file
83
libs/common/rebulk/test/test_loose.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
|
||||
|
||||
from ..loose import call
|
||||
|
||||
|
||||
def test_loose_function():
|
||||
|
||||
def func(v1, v2, v3=3, v4=4):
|
||||
return v1 + v2 + v3 + v4
|
||||
|
||||
assert call(func, 1, 2) == func(1, 2)
|
||||
assert call(func, 1, 2, 3, 5) == func(1, 2, 3, 5)
|
||||
assert call(func, 1, 2, v3=4, v4=5) == func(1, 2, v3=4, v4=5)
|
||||
assert call(func, 1, 2, 3, 4, 5) == func(1, 2, 3, 4)
|
||||
assert call(func, 1, 2, 3, 4, more=5) == func(1, 2, 3, 4)
|
||||
|
||||
|
||||
def test_loose_varargs_function():
|
||||
def func(v1, v2, *args):
|
||||
return v1 + v2 + args[0] if len(args) > 0 else 3 + args[1] if len(args) > 1 else 4
|
||||
|
||||
assert call(func, 1, 2) == func(1, 2)
|
||||
assert call(func, 1, 2, 3, 5) == func(1, 2, 3, 5)
|
||||
assert call(func, 1, 2, 3, 4, 5) == func(1, 2, 3, 4)
|
||||
|
||||
|
||||
def test_loose_kwargs_function():
|
||||
def func(v1, v2, **kwargs):
|
||||
return v1 + v2 + kwargs.get('v3', 3) + kwargs.get('v4', 4)
|
||||
|
||||
assert call(func, v1=1, v2=2) == func(v1=1, v2=2)
|
||||
assert call(func, v1=1, v2=2, v3=3, v4=5) == func(v1=1, v2=2, v3=3, v4=5)
|
||||
|
||||
|
||||
def test_loose_class():
|
||||
class Dummy(object):
|
||||
def __init__(self, v1, v2, v3=3, v4=4):
|
||||
self.v1 = v1
|
||||
self.v2 = v2
|
||||
self.v3 = v3
|
||||
self.v4 = v4
|
||||
|
||||
def call(self):
|
||||
return self.v1 + self.v2 + self.v3 + self.v4
|
||||
|
||||
assert call(Dummy, 1, 2).call() == Dummy(1, 2).call()
|
||||
assert call(Dummy, 1, 2, 3, 5).call() == Dummy(1, 2, 3, 5).call()
|
||||
assert call(Dummy, 1, 2, v3=4, v4=5).call() == Dummy(1, 2, v3=4, v4=5).call()
|
||||
assert call(Dummy, 1, 2, 3, 4, 5).call() == Dummy(1, 2, 3, 4).call()
|
||||
assert call(Dummy, 1, 2, 3, 4, more=5).call() == Dummy(1, 2, 3, 4).call()
|
||||
|
||||
|
||||
def test_loose_varargs_class():
|
||||
class Dummy(object):
|
||||
def __init__(self, v1, v2, *args):
|
||||
self.v1 = v1
|
||||
self.v2 = v2
|
||||
self.v3 = args[0] if len(args) > 0 else 3
|
||||
self.v4 = args[1] if len(args) > 1 else 4
|
||||
|
||||
def call(self):
|
||||
return self.v1 + self.v2 + self.v3 + self.v4
|
||||
|
||||
assert call(Dummy, 1, 2).call() == Dummy(1, 2).call()
|
||||
assert call(Dummy, 1, 2, 3, 5).call() == Dummy(1, 2, 3, 5).call()
|
||||
assert call(Dummy, 1, 2, 3, 4, 5).call() == Dummy(1, 2, 3, 4).call()
|
||||
|
||||
|
||||
def test_loose_kwargs_class():
|
||||
class Dummy(object):
|
||||
def __init__(self, v1, v2, **kwargs):
|
||||
self.v1 = v1
|
||||
self.v2 = v2
|
||||
self.v3 = kwargs.get('v3', 3)
|
||||
self.v4 = kwargs.get('v4', 4)
|
||||
|
||||
def call(self):
|
||||
return self.v1 + self.v2 + self.v3 + self.v4
|
||||
|
||||
assert call(Dummy, v1=1, v2=2).call() == Dummy(v1=1, v2=2).call()
|
||||
assert call(Dummy, v1=1, v2=2, v3=3, v4=5).call() == Dummy(v1=1, v2=2, v3=3, v4=5).call()
|
568
libs/common/rebulk/test/test_match.py
Normal file
568
libs/common/rebulk/test/test_match.py
Normal file
|
@ -0,0 +1,568 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, unneeded-not, len-as-condition
|
||||
|
||||
import pytest
|
||||
import six
|
||||
|
||||
from ..match import Match, Matches
|
||||
from ..pattern import StringPattern, RePattern
|
||||
from ..formatters import formatters
|
||||
|
||||
|
||||
class TestMatchClass(object):
|
||||
def test_repr(self):
|
||||
match1 = Match(1, 3, value="es")
|
||||
|
||||
assert repr(match1) == '<es:(1, 3)>'
|
||||
|
||||
match2 = Match(0, 4, value="test", private=True, name="abc", tags=['one', 'two'])
|
||||
|
||||
assert repr(match2) == '<test:(0, 4)+private+name=abc+tags=[\'one\', \'two\']>'
|
||||
|
||||
def test_names(self):
|
||||
parent = Match(0, 10, name="test")
|
||||
parent.children.append(Match(0, 10, name="child1", parent=parent))
|
||||
parent.children.append(Match(0, 10, name="child2", parent=parent))
|
||||
|
||||
assert set(parent.names) == set(["child1", "child2"])
|
||||
|
||||
def test_equality(self):
|
||||
match1 = Match(1, 3, value="es")
|
||||
match2 = Match(1, 3, value="es")
|
||||
|
||||
other = object()
|
||||
|
||||
assert hash(match1) == hash(match2)
|
||||
assert hash(match1) != hash(other)
|
||||
|
||||
assert match1 == match2
|
||||
assert not match1 == other
|
||||
|
||||
def test_inequality(self):
|
||||
match1 = Match(0, 2, value="te")
|
||||
match2 = Match(2, 4, value="st")
|
||||
match3 = Match(0, 2, value="other")
|
||||
|
||||
other = object()
|
||||
|
||||
assert hash(match1) != hash(match2)
|
||||
assert hash(match1) != hash(match3)
|
||||
|
||||
assert match1 != other
|
||||
assert match1 != match2
|
||||
assert match1 != match3
|
||||
|
||||
def test_length(self):
|
||||
match1 = Match(0, 4, value="test")
|
||||
match2 = Match(0, 2, value="spanIsUsed")
|
||||
|
||||
assert len(match1) == 4
|
||||
assert len(match2) == 2
|
||||
|
||||
def test_compare(self):
|
||||
match1 = Match(0, 2, value="te")
|
||||
match2 = Match(2, 4, value="st")
|
||||
|
||||
other = object()
|
||||
|
||||
assert match1 < match2
|
||||
assert match1 <= match2
|
||||
|
||||
assert match2 > match1
|
||||
assert match2 >= match1
|
||||
|
||||
if six.PY3:
|
||||
with pytest.raises(TypeError):
|
||||
match1 < other
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
match1 <= other
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
match1 > other
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
match1 >= other
|
||||
else:
|
||||
assert match1 < other
|
||||
assert match1 <= other
|
||||
assert not match1 > other
|
||||
assert not match1 >= other
|
||||
|
||||
def test_value(self):
|
||||
match1 = Match(1, 3)
|
||||
match1.value = "test"
|
||||
|
||||
assert match1.value == "test"
|
||||
|
||||
|
||||
class TestMatchesClass(object):
|
||||
match1 = Match(0, 2, value="te", name="start")
|
||||
match2 = Match(2, 3, value="s", tags="tag1")
|
||||
match3 = Match(3, 4, value="t", tags=["tag1", "tag2"])
|
||||
match4 = Match(2, 4, value="st", name="end")
|
||||
|
||||
def test_tag(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
assert "start" in matches.names
|
||||
assert "end" in matches.names
|
||||
|
||||
assert "tag1" in matches.tags
|
||||
assert "tag2" in matches.tags
|
||||
|
||||
tag1 = matches.tagged("tag1")
|
||||
assert len(tag1) == 2
|
||||
assert tag1[0] == self.match2
|
||||
assert tag1[1] == self.match3
|
||||
|
||||
tag2 = matches.tagged("tag2")
|
||||
assert len(tag2) == 1
|
||||
assert tag2[0] == self.match3
|
||||
|
||||
start = matches.named("start")
|
||||
assert len(start) == 1
|
||||
assert start[0] == self.match1
|
||||
|
||||
end = matches.named("end")
|
||||
assert len(end) == 1
|
||||
assert end[0] == self.match4
|
||||
|
||||
def test_base(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
|
||||
assert len(matches) == 1
|
||||
assert repr(matches) == repr([self.match1])
|
||||
assert list(matches.starting(0)) == [self.match1]
|
||||
assert list(matches.ending(2)) == [self.match1]
|
||||
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
assert len(matches) == 4
|
||||
assert list(matches.starting(2)) == [self.match2, self.match4]
|
||||
assert list(matches.starting(3)) == [self.match3]
|
||||
assert list(matches.ending(3)) == [self.match2]
|
||||
assert list(matches.ending(4)) == [self.match3, self.match4]
|
||||
assert list(matches.range()) == [self.match1, self.match2, self.match4, self.match3]
|
||||
assert list(matches.range(0)) == [self.match1, self.match2, self.match4, self.match3]
|
||||
assert list(matches.range(0, 3)) == [self.match1, self.match2, self.match4]
|
||||
assert list(matches.range(2, 3)) == [self.match2, self.match4]
|
||||
assert list(matches.range(3, 4)) == [self.match4, self.match3]
|
||||
|
||||
matches.remove(self.match1)
|
||||
assert len(matches) == 3
|
||||
assert len(matches.starting(0)) == 0
|
||||
assert len(matches.ending(2)) == 0
|
||||
|
||||
matches.clear()
|
||||
|
||||
assert len(matches) == 0
|
||||
assert len(matches.starting(0)) == 0
|
||||
assert len(matches.starting(2)) == 0
|
||||
assert len(matches.starting(3)) == 0
|
||||
assert len(matches.ending(2)) == 0
|
||||
assert len(matches.ending(3)) == 0
|
||||
assert len(matches.ending(4)) == 0
|
||||
|
||||
def test_get_slices(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
slice_matches = matches[1:3]
|
||||
|
||||
assert isinstance(slice_matches, Matches)
|
||||
|
||||
assert len(slice_matches) == 2
|
||||
assert slice_matches[0] == self.match2
|
||||
assert slice_matches[1] == self.match3
|
||||
|
||||
def test_remove_slices(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
del matches[1:3]
|
||||
|
||||
assert len(matches) == 2
|
||||
assert matches[0] == self.match1
|
||||
assert matches[1] == self.match4
|
||||
|
||||
def test_set_slices(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
matches[1:3] = self.match1, self.match4
|
||||
|
||||
assert len(matches) == 4
|
||||
assert matches[0] == self.match1
|
||||
assert matches[1] == self.match1
|
||||
assert matches[2] == self.match4
|
||||
assert matches[3] == self.match4
|
||||
|
||||
def test_set_index(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
|
||||
matches[1] = self.match4
|
||||
|
||||
assert len(matches) == 3
|
||||
assert matches[0] == self.match1
|
||||
assert matches[1] == self.match4
|
||||
assert matches[2] == self.match3
|
||||
|
||||
def test_constructor(self):
|
||||
matches = Matches([self.match1, self.match2, self.match3, self.match4])
|
||||
|
||||
assert len(matches) == 4
|
||||
assert list(matches.starting(0)) == [self.match1]
|
||||
assert list(matches.ending(2)) == [self.match1]
|
||||
assert list(matches.starting(2)) == [self.match2, self.match4]
|
||||
assert list(matches.starting(3)) == [self.match3]
|
||||
assert list(matches.ending(3)) == [self.match2]
|
||||
assert list(matches.ending(4)) == [self.match3, self.match4]
|
||||
|
||||
def test_constructor_kwargs(self):
|
||||
matches = Matches([self.match1, self.match2, self.match3, self.match4], input_string="test")
|
||||
|
||||
assert len(matches) == 4
|
||||
assert matches.input_string == "test"
|
||||
assert list(matches.starting(0)) == [self.match1]
|
||||
assert list(matches.ending(2)) == [self.match1]
|
||||
assert list(matches.starting(2)) == [self.match2, self.match4]
|
||||
assert list(matches.starting(3)) == [self.match3]
|
||||
assert list(matches.ending(3)) == [self.match2]
|
||||
assert list(matches.ending(4)) == [self.match3, self.match4]
|
||||
|
||||
def test_crop(self):
|
||||
input_string = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
match1 = Match(1, 10, input_string=input_string)
|
||||
match2 = Match(0, 2, input_string=input_string)
|
||||
match3 = Match(8, 15, input_string=input_string)
|
||||
|
||||
ret = match1.crop([match2, match3.span])
|
||||
|
||||
assert len(ret) == 1
|
||||
|
||||
assert ret[0].span == (2, 8)
|
||||
assert ret[0].value == "cdefgh"
|
||||
|
||||
ret = match1.crop((1, 10))
|
||||
assert len(ret) == 0
|
||||
|
||||
ret = match1.crop((1, 3))
|
||||
assert len(ret) == 1
|
||||
assert ret[0].span == (3, 10)
|
||||
|
||||
ret = match1.crop((7, 10))
|
||||
assert len(ret) == 1
|
||||
assert ret[0].span == (1, 7)
|
||||
|
||||
ret = match1.crop((0, 12))
|
||||
assert len(ret) == 0
|
||||
|
||||
ret = match1.crop((4, 6))
|
||||
assert len(ret) == 2
|
||||
|
||||
assert ret[0].span == (1, 4)
|
||||
assert ret[1].span == (6, 10)
|
||||
|
||||
ret = match1.crop([(3, 5), (7, 9)])
|
||||
assert len(ret) == 3
|
||||
|
||||
assert ret[0].span == (1, 3)
|
||||
assert ret[1].span == (5, 7)
|
||||
assert ret[2].span == (9, 10)
|
||||
|
||||
def test_split(self):
|
||||
input_string = "123 +word1 - word2 + word3 456"
|
||||
match = Match(3, len(input_string) - 3, input_string=input_string)
|
||||
splitted = match.split(" -+")
|
||||
|
||||
assert len(splitted) == 3
|
||||
assert [split.value for split in splitted] == ["word1", "word2", "word3"]
|
||||
|
||||
|
||||
class TestMaches(object):
|
||||
def test_names(self):
|
||||
input_string = "One Two Three"
|
||||
|
||||
matches = Matches()
|
||||
|
||||
matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string))
|
||||
|
||||
assert set(matches.names) == set(["1-str", "1-re", "2-str", "2-re", "3-str", "3-re"])
|
||||
|
||||
def test_filters(self):
|
||||
input_string = "One Two Three"
|
||||
|
||||
matches = Matches()
|
||||
|
||||
matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string))
|
||||
|
||||
selection = matches.starting(0)
|
||||
assert len(selection) == 2
|
||||
|
||||
selection = matches.starting(0, lambda m: "str" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].pattern.name == "1-str"
|
||||
|
||||
selection = matches.ending(7, predicate=lambda m: "str" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].pattern.name == "2-str"
|
||||
|
||||
selection = matches.previous(matches.named("2-str")[0])
|
||||
assert len(selection) == 2
|
||||
assert selection[0].pattern.name == "1-str"
|
||||
assert selection[1].pattern.name == "1-re"
|
||||
|
||||
selection = matches.previous(matches.named("2-str", 0), lambda m: "str" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].pattern.name == "1-str"
|
||||
|
||||
selection = matches.next(matches.named("2-str", 0))
|
||||
assert len(selection) == 2
|
||||
assert selection[0].pattern.name == "3-str"
|
||||
assert selection[1].pattern.name == "3-re"
|
||||
|
||||
selection = matches.next(matches.named("2-str", 0), index=0, predicate=lambda m: "re" in m.tags)
|
||||
assert selection is not None
|
||||
assert selection.pattern.name == "3-re"
|
||||
|
||||
selection = matches.next(matches.named("2-str", index=0), lambda m: "re" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].pattern.name == "3-re"
|
||||
|
||||
selection = matches.named("2-str", lambda m: "re" in m.tags)
|
||||
assert len(selection) == 0
|
||||
|
||||
selection = matches.named("2-re", lambda m: "re" in m.tags, 0)
|
||||
assert selection is not None
|
||||
assert selection.name == "2-re" # pylint:disable=no-member
|
||||
|
||||
selection = matches.named("2-re", lambda m: "re" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].name == "2-re"
|
||||
|
||||
selection = matches.named("2-re", lambda m: "re" in m.tags, index=1000)
|
||||
assert selection is None
|
||||
|
||||
def test_raw(self):
|
||||
input_string = "0123456789"
|
||||
|
||||
match = Match(0, 10, input_string=input_string, formatter=lambda s: s*2)
|
||||
|
||||
assert match.value == match.raw * 2
|
||||
assert match.raw == input_string
|
||||
|
||||
match.raw_end = 9
|
||||
match.raw_start = 1
|
||||
|
||||
assert match.value == match.raw * 2
|
||||
assert match.raw == input_string[1:9]
|
||||
|
||||
match.raw_end = None
|
||||
match.raw_start = None
|
||||
|
||||
assert match.value == match.raw * 2
|
||||
assert match.raw == input_string
|
||||
|
||||
|
||||
def test_formatter_chain(self):
|
||||
input_string = "100"
|
||||
|
||||
match = Match(0, 3, input_string=input_string, formatter=formatters(int, lambda s: s*2, lambda s: s+10))
|
||||
|
||||
assert match.raw == input_string
|
||||
assert match.value == 100 * 2 + 10
|
||||
|
||||
|
||||
def test_to_dict(self):
|
||||
input_string = "One Two Two Three"
|
||||
|
||||
matches = Matches()
|
||||
|
||||
matches.extend(StringPattern("One", name="1", tags=["One", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("One", name="1", tags=["One", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Two", name="2", tags=["Two", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Two", name="2", tags=["Two", "re"]).matches(input_string))
|
||||
matches.extend(RePattern("Two", name="2", tags=["Two", "reBis"]).matches(input_string))
|
||||
matches.extend(StringPattern("Three", name="3", tags=["Three", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Three", name="3bis", tags=["Three", "re"]).matches(input_string))
|
||||
matches.extend(RePattern(r"(\w+)", name="words").matches(input_string))
|
||||
|
||||
kvalues = matches.to_dict(first_value=True)
|
||||
assert kvalues == {"1": "One",
|
||||
"2": "Two",
|
||||
"3": "Three",
|
||||
"3bis": "Three",
|
||||
"words": "One"}
|
||||
assert kvalues.values_list["words"] == ["One", "Two", "Three"]
|
||||
|
||||
kvalues = matches.to_dict(enforce_list=True)
|
||||
assert kvalues["words"] == ["One", "Two", "Three"]
|
||||
|
||||
kvalues = matches.to_dict(details=True)
|
||||
assert kvalues["1"].value == "One"
|
||||
|
||||
assert len(kvalues["2"]) == 2
|
||||
assert kvalues["2"][0].value == "Two"
|
||||
assert kvalues["2"][1].value == "Two"
|
||||
|
||||
assert kvalues["3"].value == "Three"
|
||||
assert kvalues["3bis"].value == "Three"
|
||||
|
||||
assert len(kvalues["words"]) == 4
|
||||
assert kvalues["words"][0].value == "One"
|
||||
assert kvalues["words"][1].value == "Two"
|
||||
assert kvalues["words"][2].value == "Two"
|
||||
assert kvalues["words"][3].value == "Three"
|
||||
|
||||
kvalues = matches.to_dict(details=True)
|
||||
assert kvalues["1"].value == "One"
|
||||
|
||||
assert len(kvalues.values_list["2"]) == 2
|
||||
assert kvalues.values_list["2"][0].value == "Two"
|
||||
assert kvalues.values_list["2"][1].value == "Two"
|
||||
|
||||
assert kvalues["3"].value == "Three"
|
||||
assert kvalues["3bis"].value == "Three"
|
||||
|
||||
assert len(kvalues.values_list["words"]) == 4
|
||||
assert kvalues.values_list["words"][0].value == "One"
|
||||
assert kvalues.values_list["words"][1].value == "Two"
|
||||
assert kvalues.values_list["words"][2].value == "Two"
|
||||
assert kvalues.values_list["words"][3].value == "Three"
|
||||
|
||||
def test_chains(self):
|
||||
input_string = "wordX 10 20 30 40 wordA, wordB, wordC 70 80 wordX"
|
||||
|
||||
matches = Matches(input_string=input_string)
|
||||
|
||||
matches.extend(RePattern(r"\d+", name="digit").matches(input_string))
|
||||
matches.extend(RePattern("[a-zA-Z]+", name="word").matches(input_string))
|
||||
|
||||
assert len(matches) == 11
|
||||
|
||||
a_start = input_string.find('wordA')
|
||||
|
||||
b_start = input_string.find('wordB')
|
||||
b_end = b_start + len('wordB')
|
||||
|
||||
c_start = input_string.find('wordC')
|
||||
c_end = c_start + len('wordC')
|
||||
|
||||
chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "word")
|
||||
assert len(chain_before) == 1
|
||||
assert chain_before[0].value == 'wordA'
|
||||
|
||||
chain_before = matches.chain_before(Match(b_start, b_start), " ,", predicate=lambda match: match.name == "word")
|
||||
assert len(chain_before) == 1
|
||||
assert chain_before[0].value == 'wordA'
|
||||
|
||||
chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_before) == 0
|
||||
|
||||
chain_before = matches.chain_before(a_start, " ,", predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_before) == 4
|
||||
assert [match.value for match in chain_before] == ["40", "30", "20", "10"]
|
||||
|
||||
chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "word")
|
||||
assert len(chain_after) == 1
|
||||
assert chain_after[0].value == 'wordC'
|
||||
|
||||
chain_after = matches.chain_after(Match(b_end, b_end), " ,", predicate=lambda match: match.name == "word")
|
||||
assert len(chain_after) == 1
|
||||
assert chain_after[0].value == 'wordC'
|
||||
|
||||
chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_after) == 0
|
||||
|
||||
chain_after = matches.chain_after(c_end, " ,", predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_after) == 2
|
||||
assert [match.value for match in chain_after] == ["70", "80"]
|
||||
|
||||
chain_after = matches.chain_after(c_end, " ,", end=10000, predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_after) == 2
|
||||
assert [match.value for match in chain_after] == ["70", "80"]
|
||||
|
||||
def test_holes(self):
|
||||
input_string = '1'*10+'2'*10+'3'*10+'4'*10+'5'*10+'6'*10+'7'*10
|
||||
|
||||
hole1 = Match(0, 10, input_string=input_string)
|
||||
hole2 = Match(20, 30, input_string=input_string)
|
||||
hole3 = Match(30, 40, input_string=input_string)
|
||||
hole4 = Match(60, 70, input_string=input_string)
|
||||
|
||||
matches = Matches([hole1, hole2], input_string=input_string)
|
||||
matches.append(hole3)
|
||||
matches.append(hole4)
|
||||
|
||||
holes = list(matches.holes())
|
||||
assert len(holes) == 2
|
||||
assert holes[0].span == (10, 20)
|
||||
assert holes[0].value == '2'*10
|
||||
assert holes[1].span == (40, 60)
|
||||
assert holes[1].value == '5' * 10 + '6' * 10
|
||||
|
||||
holes = list(matches.holes(5, 15))
|
||||
assert len(holes) == 1
|
||||
assert holes[0].span == (10, 15)
|
||||
assert holes[0].value == '2'*5
|
||||
|
||||
holes = list(matches.holes(5, 15, formatter=lambda value: "formatted"))
|
||||
assert len(holes) == 1
|
||||
assert holes[0].span == (10, 15)
|
||||
assert holes[0].value == "formatted"
|
||||
|
||||
holes = list(matches.holes(5, 15, predicate=lambda hole: False))
|
||||
assert len(holes) == 0
|
||||
|
||||
def test_holes_empty(self):
|
||||
input_string = "Test hole on empty matches"
|
||||
matches = Matches(input_string=input_string)
|
||||
holes = matches.holes()
|
||||
assert len(holes) == 1
|
||||
assert holes[0].value == input_string
|
||||
|
||||
def test_holes_seps(self):
|
||||
input_string = "Test hole - with many separators + included"
|
||||
match = StringPattern("many").matches(input_string)
|
||||
|
||||
matches = Matches(match, input_string)
|
||||
holes = matches.holes()
|
||||
|
||||
assert len(holes) == 2
|
||||
|
||||
holes = matches.holes(seps="-+")
|
||||
|
||||
assert len(holes) == 4
|
||||
assert [hole.value for hole in holes] == ["Test hole ", " with ", " separators ", " included"]
|
858
libs/common/rebulk/test/test_pattern.py
Normal file
858
libs/common/rebulk/test/test_pattern.py
Normal file
|
@ -0,0 +1,858 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, unbalanced-tuple-unpacking, len-as-condition
|
||||
|
||||
import re
|
||||
import pytest
|
||||
|
||||
from ..pattern import StringPattern, RePattern, FunctionalPattern, REGEX_AVAILABLE
|
||||
from ..match import Match
|
||||
|
||||
class TestStringPattern(object):
|
||||
"""
|
||||
Tests for StringPattern matching
|
||||
"""
|
||||
|
||||
input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \
|
||||
"which were the Hebrew letter qoph."
|
||||
|
||||
def test_single(self):
|
||||
pattern = StringPattern("Celtic")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_repr(self):
|
||||
pattern = StringPattern("Celtic")
|
||||
|
||||
assert repr(pattern) == '<StringPattern:(\'Celtic\',)>'
|
||||
|
||||
def test_ignore_case(self):
|
||||
pattern = StringPattern("celtic", ignore_case=False)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = StringPattern("celtic", ignore_case=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_private_names(self):
|
||||
pattern = StringPattern("celtic", name="test", private_names=["test"], ignore_case=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert matches[0].private
|
||||
|
||||
def test_ignore_names(self):
|
||||
pattern = StringPattern("celtic", name="test", ignore_names=["test"], ignore_case=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_no_match(self):
|
||||
pattern = StringPattern("Python")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert not matches
|
||||
|
||||
def test_multiple_patterns(self):
|
||||
pattern = StringPattern("playing", "annoyed", "Hebrew")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (18, 25)
|
||||
assert matches[0].value == "playing"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_start_end_kwargs(self):
|
||||
pattern = StringPattern("Abyssinian", start=20, end=40)
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_matches_kwargs(self):
|
||||
pattern = StringPattern("Abyssinian", name="test", value="AB")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "AB"
|
||||
|
||||
def test_post_processor(self):
|
||||
def post_processor(matches, pattern):
|
||||
assert len(matches) == 1
|
||||
assert isinstance(pattern, StringPattern)
|
||||
|
||||
return []
|
||||
|
||||
pattern = StringPattern("Abyssinian", name="test", value="AB", post_processor=post_processor)
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
class TestRePattern(object):
|
||||
"""
|
||||
Tests for RePattern matching
|
||||
"""
|
||||
|
||||
input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \
|
||||
"which were the Hebrew letter qoph."
|
||||
|
||||
def test_single_compiled(self):
|
||||
pattern = RePattern(re.compile("Celt.?c"))
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_single_string(self):
|
||||
pattern = RePattern("Celt.?c")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_single_kwargs(self):
|
||||
pattern = RePattern({"pattern": "celt.?c", "flags": re.IGNORECASE})
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_single_vargs(self):
|
||||
pattern = RePattern(("celt.?c", re.IGNORECASE))
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_no_match(self):
|
||||
pattern = RePattern("abc.?def")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_shortcuts(self):
|
||||
pattern = RePattern("Celtic-violin", abbreviations=[("-", r"[\W_]+")])
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
pattern = RePattern({"pattern": "celtic-violin", "flags": re.IGNORECASE}, abbreviations=[("-", r"[\W_]+")])
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_multiple_patterns(self):
|
||||
pattern = RePattern("pla.?ing", "ann.?yed", "Heb.?ew")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (18, 25)
|
||||
assert matches[0].value == "playing"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_unnamed_groups(self):
|
||||
pattern = RePattern(r"(Celt.?c)\s+(\w+)")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
|
||||
assert isinstance(parent, Match)
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert len(parent.children) == 2
|
||||
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name is None
|
||||
assert group1.value == "Celtic"
|
||||
assert group1.parent == parent
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name is None
|
||||
assert group2.value == "violin"
|
||||
assert group2.parent == parent
|
||||
|
||||
def test_named_groups(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
|
||||
assert isinstance(parent, Match)
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert len(parent.children) == 2
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
assert group1.parent == parent
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
assert group2.parent == parent
|
||||
|
||||
def test_children(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 2
|
||||
group1, group2 = matches
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
|
||||
def test_children_parent_private(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", children=True, private_parent=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
parent, group1, group2 = matches
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert parent.private
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert not group1.private
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert not group2.private
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
|
||||
def test_parent_children_private(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", private_children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
parent, group1, group2 = matches
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert not parent.private
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.private
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.private
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
|
||||
def test_every(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", every=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
parent, group1, group2 = matches
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert not parent.private
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert not group1.private
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert not group2.private
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
|
||||
def test_private_names(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", private_names=["param2"], children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 2
|
||||
assert matches[0].name == "param1"
|
||||
assert not matches[0].private
|
||||
assert matches[1].name == "param2"
|
||||
assert matches[1].private
|
||||
|
||||
def test_ignore_names(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", ignore_names=["param2"], children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "param1"
|
||||
|
||||
def test_matches_kwargs(self):
|
||||
pattern = RePattern("He.rew", name="test", value="HE")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "HE"
|
||||
|
||||
pattern = RePattern("H(e.)(rew)", name="test", value="HE")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "HE"
|
||||
|
||||
children = matches[0].children
|
||||
assert len(children) == 2
|
||||
assert children[0].name == "test"
|
||||
assert children[0].value == "HE"
|
||||
|
||||
assert children[1].name == "test"
|
||||
assert children[1].value == "HE"
|
||||
|
||||
pattern = RePattern("H(?P<first>e.)(?P<second>rew)", name="test", value="HE")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "HE"
|
||||
|
||||
children = matches[0].children
|
||||
assert len(children) == 2
|
||||
assert children[0].name == "first"
|
||||
assert children[0].value == "HE"
|
||||
|
||||
assert children[1].name == "second"
|
||||
assert children[1].value == "HE"
|
||||
|
||||
|
||||
class TestFunctionalPattern(object):
|
||||
"""
|
||||
Tests for FunctionalPattern matching
|
||||
"""
|
||||
|
||||
input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \
|
||||
"which were the Hebrew letter qoph."
|
||||
|
||||
def test_single_vargs(self):
|
||||
def func(input_string):
|
||||
i = input_string.find("fly")
|
||||
if i > -1:
|
||||
return i, i + len("fly"), "fly", "functional"
|
||||
|
||||
pattern = FunctionalPattern(func)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (14, 17)
|
||||
assert matches[0].name == "functional"
|
||||
assert matches[0].value == "fly"
|
||||
|
||||
def test_single_kwargs(self):
|
||||
def func(input_string):
|
||||
i = input_string.find("fly")
|
||||
if i > -1:
|
||||
return {"start": i, "end": i + len("fly"), "name": "functional"}
|
||||
|
||||
pattern = FunctionalPattern(func)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (14, 17)
|
||||
assert matches[0].name == "functional"
|
||||
assert matches[0].value == "fly"
|
||||
|
||||
def test_multiple_objects(self):
|
||||
def func(input_string):
|
||||
i = input_string.find("fly")
|
||||
matches = []
|
||||
if i > -1:
|
||||
matches.append((i, i + len("fly"), {'name': "functional"}))
|
||||
i = input_string.find("annoyed")
|
||||
if i > -1:
|
||||
matches.append((i, i + len("annoyed")))
|
||||
i = input_string.find("Hebrew")
|
||||
if i > -1:
|
||||
matches.append({"start": i, "end": i + len("Hebrew")})
|
||||
return matches
|
||||
|
||||
pattern = FunctionalPattern(func)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (14, 17)
|
||||
assert matches[0].name == "functional"
|
||||
assert matches[0].value == "fly"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_multiple_generator(self):
|
||||
def func(input_string):
|
||||
i = input_string.find("fly")
|
||||
if i > -1:
|
||||
yield (i, i + len("fly"), {'name': "functional"})
|
||||
i = input_string.find("annoyed")
|
||||
if i > -1:
|
||||
yield (i, i + len("annoyed"))
|
||||
i = input_string.find("Hebrew")
|
||||
if i > -1:
|
||||
yield (i, {"end": i + len("Hebrew")})
|
||||
|
||||
pattern = FunctionalPattern(func)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (14, 17)
|
||||
assert matches[0].name == "functional"
|
||||
assert matches[0].value == "fly"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_no_match(self):
|
||||
pattern = FunctionalPattern(lambda x: None)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_multiple_patterns(self):
|
||||
def playing(input_string):
|
||||
i = input_string.find("playing")
|
||||
if i > -1:
|
||||
return i, i + len("playing")
|
||||
|
||||
def annoyed(input_string):
|
||||
i = input_string.find("annoyed")
|
||||
if i > -1:
|
||||
return i, i + len("annoyed")
|
||||
|
||||
def hebrew(input_string):
|
||||
i = input_string.find("Hebrew")
|
||||
if i > -1:
|
||||
return i, i + len("Hebrew")
|
||||
|
||||
pattern = FunctionalPattern(playing, annoyed, hebrew)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (18, 25)
|
||||
assert matches[0].value == "playing"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_matches_kwargs(self):
|
||||
def playing(input_string):
|
||||
i = input_string.find("playing")
|
||||
if i > -1:
|
||||
return i, i + len("playing")
|
||||
|
||||
pattern = FunctionalPattern(playing, name="test", value="PLAY")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "PLAY"
|
||||
|
||||
|
||||
class TestValue(object):
|
||||
"""
|
||||
Tests for value option
|
||||
"""
|
||||
|
||||
input_string = "This string contains 1849 a number"
|
||||
|
||||
def test_str_value(self):
|
||||
pattern = StringPattern("1849", name="dummy", value="test")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (21, 25)
|
||||
assert matches[0].value == "test"
|
||||
|
||||
def test_dict_child_value(self):
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
formatter={'intParam': lambda x: int(x) * 2,
|
||||
'strParam': lambda x: "really " + x},
|
||||
format_all=True,
|
||||
value={'intParam': 'INT_PARAM_VALUE'})
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
assert len(parent.children) == 2
|
||||
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (12, 20)
|
||||
assert group1.value == "really contains"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (21, 25)
|
||||
assert group2.value == 'INT_PARAM_VALUE'
|
||||
|
||||
def test_dict_default_value(self):
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
formatter={'intParam': lambda x: int(x) * 2,
|
||||
'strParam': lambda x: "really " + x},
|
||||
format_all=True,
|
||||
value={'__children__': 'CHILD', 'strParam': 'STR_VALUE', '__parent__': 'PARENT'})
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
assert parent.value == "PARENT"
|
||||
assert len(parent.children) == 2
|
||||
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (12, 20)
|
||||
assert group1.value == "STR_VALUE"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (21, 25)
|
||||
assert group2.value == "CHILD"
|
||||
|
||||
|
||||
class TestFormatter(object):
|
||||
"""
|
||||
Tests for formatter option
|
||||
"""
|
||||
|
||||
input_string = "This string contains 1849 a number"
|
||||
|
||||
def test_single_string(self):
|
||||
pattern = StringPattern("1849", name="dummy", formatter=lambda x: int(x) / 2)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (21, 25)
|
||||
assert matches[0].value == 1849 / 2
|
||||
|
||||
def test_single_re_no_group(self):
|
||||
pattern = RePattern(r"\d+", formatter=lambda x: int(x) * 2)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (21, 25)
|
||||
assert matches[0].value == 1849 * 2
|
||||
|
||||
def test_single_re_named_groups(self):
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
formatter={'intParam': lambda x: int(x) * 2,
|
||||
'strParam': lambda x: "really " + x}, format_all=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
assert len(parent.children) == 2
|
||||
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (12, 20)
|
||||
assert group1.value == "really contains"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (21, 25)
|
||||
assert group2.value == 1849 * 2
|
||||
|
||||
def test_repeated_captures_option(self):
|
||||
pattern = RePattern(r"\[(\d+)\](?:-(\d+))*")
|
||||
|
||||
matches = list(pattern.matches("[02]-03-04-05-06"))
|
||||
assert len(matches) == 1
|
||||
|
||||
match = matches[0]
|
||||
if REGEX_AVAILABLE:
|
||||
assert len(match.children) == 5
|
||||
assert [child.value for child in match.children] == ["02", "03", "04", "05", "06"]
|
||||
else:
|
||||
assert len(match.children) == 2
|
||||
assert [child.value for child in match.children] == ["02", "06"]
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
RePattern(r"\[(\d+)\](?:-(\d+))*", repeated_captures=True)
|
||||
|
||||
pattern = RePattern(r"\[(\d+)\](?:-(\d+))*", repeated_captures=False)
|
||||
|
||||
matches = list(pattern.matches("[02]-03-04-05-06"))
|
||||
assert len(matches) == 1
|
||||
|
||||
match = matches[0]
|
||||
assert len(match.children) == 2
|
||||
assert [child.value for child in match.children] == ["02", "06"]
|
||||
|
||||
def test_single_functional(self):
|
||||
def digit(input_string):
|
||||
i = input_string.find("1849")
|
||||
if i > -1:
|
||||
return i, i + len("1849")
|
||||
|
||||
pattern = FunctionalPattern(digit, formatter=lambda x: int(x) * 3)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (21, 25)
|
||||
assert matches[0].value == 1849 * 3
|
||||
|
||||
|
||||
class TestValidator(object):
|
||||
"""
|
||||
Tests for validator option
|
||||
"""
|
||||
|
||||
input_string = "This string contains 1849 a number"
|
||||
|
||||
@staticmethod
|
||||
def true_validator(match):
|
||||
return int(match.value) < 1850
|
||||
|
||||
@staticmethod
|
||||
def false_validator(match):
|
||||
return int(match.value) >= 1850
|
||||
|
||||
def test_single_string(self):
|
||||
pattern = StringPattern("1849", name="dummy", validator=self.false_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = StringPattern("1849", validator=self.true_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_single_re_no_group(self):
|
||||
pattern = RePattern(r"\d+", validator=self.false_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = RePattern(r"\d+", validator=self.true_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_single_re_named_groups(self):
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
validator={'intParam': self.false_validator}, validate_all=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
validator={'intParam': self.true_validator}, validate_all=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_validate_all(self):
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=lambda match: match.value < 100,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=lambda match: match.value > 100,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def invalid_func(match):
|
||||
if match.name == 'intParam':
|
||||
return True
|
||||
return match.value.startswith('abc')
|
||||
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=invalid_func, validate_all=True,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
def func(match):
|
||||
if match.name == 'intParam':
|
||||
return True
|
||||
return match.value.startswith('contains')
|
||||
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=func, validate_all=True,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_format_all(self):
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
for match in matches:
|
||||
assert match.value is not None
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, format_all=True)
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
for match in matches:
|
||||
assert match.value is not None
|
||||
|
||||
def test_single_functional(self):
|
||||
def digit(input_string):
|
||||
i = input_string.find("1849")
|
||||
if i > -1:
|
||||
return i, i + len("1849")
|
||||
|
||||
pattern = FunctionalPattern(digit, validator=self.false_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = FunctionalPattern(digit, validator=self.true_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
215
libs/common/rebulk/test/test_processors.py
Normal file
215
libs/common/rebulk/test/test_processors.py
Normal file
|
@ -0,0 +1,215 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
|
||||
|
||||
from ..pattern import StringPattern, RePattern
|
||||
from ..processors import ConflictSolver
|
||||
from ..rules import execute_rule
|
||||
from ..match import Matches
|
||||
|
||||
|
||||
def test_conflict_1():
|
||||
input_string = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
pattern = StringPattern("ijklmn", "kl", "abcdef", "ab", "ef", "yz")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
|
||||
assert values == ["ijklmn", "abcdef", "yz"]
|
||||
|
||||
|
||||
def test_conflict_2():
|
||||
input_string = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
pattern = StringPattern("ijklmn", "jklmnopqrst")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
|
||||
assert values == ["jklmnopqrst"]
|
||||
|
||||
|
||||
def test_conflict_3():
|
||||
input_string = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
pattern = StringPattern("ijklmnopqrst", "jklmnopqrst")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
|
||||
assert values == ["ijklmnopqrst"]
|
||||
|
||||
|
||||
def test_conflict_4():
|
||||
input_string = "123456789"
|
||||
|
||||
pattern = StringPattern("123", "456789")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
assert values == ["123", "456789"]
|
||||
|
||||
|
||||
def test_conflict_5():
|
||||
input_string = "123456789"
|
||||
|
||||
pattern = StringPattern("123456", "789")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
assert values == ["123456", "789"]
|
||||
|
||||
|
||||
def test_prefer_longer_parent():
|
||||
input_string = "xxx.1x02.xxx"
|
||||
|
||||
re1 = RePattern("([0-9]+)x([0-9]+)", name='prefer', children=True, formatter=int)
|
||||
re2 = RePattern("x([0-9]+)", name='skip', children=True)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 2
|
||||
assert matches[0].value == 1
|
||||
assert matches[1].value == 2
|
||||
|
||||
|
||||
def test_conflict_solver_1():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: '__default__')
|
||||
re2 = StringPattern("34567")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "2345678"
|
||||
|
||||
|
||||
def test_conflict_solver_2():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: '__default__')
|
||||
re2 = StringPattern("34567", conflict_solver=lambda match, conflicting: conflicting)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "34567"
|
||||
|
||||
|
||||
def test_conflict_solver_3():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: match)
|
||||
re2 = StringPattern("34567")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "34567"
|
||||
|
||||
|
||||
def test_conflict_solver_4():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678")
|
||||
re2 = StringPattern("34567", conflict_solver=lambda match, conflicting: conflicting)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "34567"
|
||||
|
||||
|
||||
def test_conflict_solver_5():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: conflicting)
|
||||
re2 = StringPattern("34567")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "2345678"
|
||||
|
||||
|
||||
def test_conflict_solver_6():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678")
|
||||
re2 = StringPattern("34567", conflict_solver=lambda match, conflicting: conflicting)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "34567"
|
||||
|
||||
|
||||
def test_conflict_solver_7():
|
||||
input_string = "102"
|
||||
|
||||
re1 = StringPattern("102")
|
||||
re2 = StringPattern("02")
|
||||
|
||||
matches = Matches(re2.matches(input_string))
|
||||
matches.extend(re1.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "102"
|
||||
|
||||
|
||||
def test_unresolved():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("23456")
|
||||
re2 = StringPattern("34567")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 2
|
||||
|
||||
re1 = StringPattern("34567")
|
||||
re2 = StringPattern("2345678", conflict_solver=lambda match, conflicting: None)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 2
|
||||
|
||||
re1 = StringPattern("34567", conflict_solver=lambda match, conflicting: None)
|
||||
re2 = StringPattern("2345678")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 2
|
419
libs/common/rebulk/test/test_rebulk.py
Normal file
419
libs/common/rebulk/test/test_rebulk.py
Normal file
|
@ -0,0 +1,419 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
|
||||
|
||||
from ..rebulk import Rebulk
|
||||
from ..rules import Rule
|
||||
from . import rebulk_rules_module as rm
|
||||
|
||||
|
||||
def test_rebulk_simple():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.string("quick")
|
||||
rebulk.regex("f.x")
|
||||
|
||||
def func(input_string):
|
||||
i = input_string.find("over")
|
||||
if i > -1:
|
||||
return i, i + len("over")
|
||||
|
||||
rebulk.functional(func)
|
||||
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "fox"
|
||||
assert matches[2].value == "over"
|
||||
|
||||
|
||||
def test_rebulk_composition():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.string("quick")
|
||||
rebulk.rebulk(Rebulk().regex("f.x"))
|
||||
|
||||
rebulk.rebulk(Rebulk(disabled=lambda context: True).functional(lambda string: None))
|
||||
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "fox"
|
||||
|
||||
|
||||
def test_rebulk_context():
|
||||
rebulk = Rebulk()
|
||||
|
||||
context = {'nostring': True, 'word': 'lazy'}
|
||||
|
||||
rebulk.string("quick", disabled=lambda context: context.get('nostring', False))
|
||||
rebulk.regex("f.x", disabled=lambda context: context.get('noregex', False))
|
||||
|
||||
def func(input_string, context):
|
||||
word = context.get('word', 'over')
|
||||
i = input_string.find(word)
|
||||
if i > -1:
|
||||
return i, i + len(word)
|
||||
|
||||
rebulk.functional(func)
|
||||
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = rebulk.matches(input_string, context)
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "fox"
|
||||
assert matches[1].value == "lazy"
|
||||
|
||||
|
||||
def test_rebulk_prefer_longer():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = Rebulk().string("quick").string("own").regex("br.{2}n").matches(input_string)
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "brown"
|
||||
|
||||
|
||||
def test_rebulk_defaults():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
def func(input_string):
|
||||
i = input_string.find("fox")
|
||||
if i > -1:
|
||||
return i, i + len("fox")
|
||||
|
||||
matches = Rebulk()\
|
||||
.string_defaults(name="string", tags=["a", "b"])\
|
||||
.regex_defaults(name="regex") \
|
||||
.functional_defaults(name="functional") \
|
||||
.string("quick", tags=["c"])\
|
||||
.functional(func)\
|
||||
.regex("br.{2}n") \
|
||||
.matches(input_string)
|
||||
assert matches[0].name == "string"
|
||||
assert matches[0].tags == ["a", "b", "c"]
|
||||
assert matches[1].name == "functional"
|
||||
assert matches[2].name == "regex"
|
||||
|
||||
matches = Rebulk() \
|
||||
.defaults(name="default", tags=["0"])\
|
||||
.string_defaults(name="string", tags=["a", "b"]) \
|
||||
.functional_defaults(name="functional", tags=["1"]) \
|
||||
.string("quick", tags=["c"]) \
|
||||
.functional(func) \
|
||||
.regex("br.{2}n") \
|
||||
.matches(input_string)
|
||||
assert matches[0].name == "string"
|
||||
assert matches[0].tags == ["0", "a", "b", "c"]
|
||||
assert matches[1].name == "functional"
|
||||
assert matches[1].tags == ["0", "1"]
|
||||
assert matches[2].name == "default"
|
||||
assert matches[2].tags == ["0"]
|
||||
|
||||
|
||||
def test_rebulk_rebulk():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
base = Rebulk().string("quick")
|
||||
child = Rebulk().string("own").regex("br.{2}n")
|
||||
|
||||
matches = base.rebulk(child).matches(input_string)
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "brown"
|
||||
|
||||
|
||||
def test_rebulk_no_default():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = Rebulk(default_rules=False).string("quick").string("own").regex("br.{2}n").matches(input_string)
|
||||
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "own"
|
||||
assert matches[2].value == "brown"
|
||||
|
||||
|
||||
def test_rebulk_empty_match():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = Rebulk(default_rules=False).string("quick").string("own").regex("br(.*?)own", children=True)\
|
||||
.matches(input_string)
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "own"
|
||||
|
||||
|
||||
def test_rebulk_tags_names():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.string("quick", name="str", tags=["first", "other"])
|
||||
rebulk.regex("f.x", tags="other")
|
||||
|
||||
def func(input_string):
|
||||
i = input_string.find("over")
|
||||
if i > -1:
|
||||
return i, i + len("over"), {'tags': ['custom']}
|
||||
|
||||
rebulk.functional(func, name="fn")
|
||||
|
||||
def func2(input_string):
|
||||
i = input_string.find("lazy")
|
||||
if i > -1:
|
||||
return {'start': i, 'end': i + len("lazy"), 'tags': ['custom']}
|
||||
|
||||
rebulk.functional(func2, name="fn")
|
||||
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
assert len(matches) == 4
|
||||
|
||||
assert len(matches.named("str")) == 1
|
||||
assert len(matches.named("fn")) == 2
|
||||
assert len(matches.named("false")) == 0
|
||||
assert len(matches.tagged("false")) == 0
|
||||
assert len(matches.tagged("first")) == 1
|
||||
assert len(matches.tagged("other")) == 2
|
||||
assert len(matches.tagged("custom")) == 2
|
||||
|
||||
|
||||
def test_rebulk_rules_1():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.regex(r'\d{4}', name="year")
|
||||
rebulk.rules(rm.RemoveAllButLastYear)
|
||||
|
||||
matches = rebulk.matches("1984 keep only last 1968 entry 1982 case")
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "1982"
|
||||
|
||||
|
||||
def test_rebulk_rules_2():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.regex(r'\d{4}', name="year")
|
||||
rebulk.string(r'year', name="yearPrefix", private=True)
|
||||
rebulk.string(r'keep', name="yearSuffix", private=True)
|
||||
rebulk.rules(rm.PrefixedSuffixedYear)
|
||||
|
||||
matches = rebulk.matches("Keep suffix 1984 keep prefixed year 1968 and remove the rest 1982")
|
||||
assert len(matches) == 2
|
||||
assert matches[0].value == "1984"
|
||||
assert matches[1].value == "1968"
|
||||
|
||||
|
||||
def test_rebulk_rules_3():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.regex(r'\d{4}', name="year")
|
||||
rebulk.string(r'year', name="yearPrefix", private=True)
|
||||
rebulk.string(r'keep', name="yearSuffix", private=True)
|
||||
rebulk.rules(rm.PrefixedSuffixedYearNoLambda)
|
||||
|
||||
matches = rebulk.matches("Keep suffix 1984 keep prefixed year 1968 and remove the rest 1982")
|
||||
assert len(matches) == 2
|
||||
assert matches[0].value == "1984"
|
||||
assert matches[1].value == "1968"
|
||||
|
||||
|
||||
def test_rebulk_rules_4():
|
||||
class FirstOnlyRule(Rule):
|
||||
def when(self, matches, context):
|
||||
grabbed = matches.named("grabbed", 0)
|
||||
if grabbed and matches.previous(grabbed):
|
||||
return grabbed
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.regex("This match (.*?)grabbed", name="grabbed")
|
||||
rebulk.regex("if it's (.*?)first match", private=True)
|
||||
|
||||
rebulk.rules(FirstOnlyRule)
|
||||
|
||||
matches = rebulk.matches("This match is grabbed only if it's the first match")
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "This match is grabbed"
|
||||
|
||||
matches = rebulk.matches("if it's NOT the first match, This match is NOT grabbed")
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
class TestMarkers(object):
|
||||
def test_one_marker(self):
|
||||
class MarkerRule(Rule):
|
||||
def when(self, matches, context):
|
||||
word_match = matches.named("word", 0)
|
||||
marker = matches.markers.at_match(word_match, lambda marker: marker.name == "mark1", 0)
|
||||
if not marker:
|
||||
return word_match
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
|
||||
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
|
||||
.regex(r'\[.*?\]', marker=True, name="mark2") \
|
||||
.string("word", name="word") \
|
||||
.rules(MarkerRule)
|
||||
|
||||
matches = rebulk.matches("grab (word) only if it's in parenthesis")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "word"
|
||||
|
||||
matches = rebulk.matches("don't grab [word] if it's in braket")
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = rebulk.matches("don't grab word at all")
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_multiple_marker(self):
|
||||
class MarkerRule(Rule):
|
||||
def when(self, matches, context):
|
||||
word_match = matches.named("word", 0)
|
||||
marker = matches.markers.at_match(word_match,
|
||||
lambda marker: marker.name == "mark1" or marker.name == "mark2")
|
||||
if len(marker) < 2:
|
||||
return word_match
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
|
||||
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
|
||||
.regex(r'\[.*?\]', marker=True, name="mark2") \
|
||||
.regex("w.*?d", name="word") \
|
||||
.rules(MarkerRule)
|
||||
|
||||
matches = rebulk.matches("[grab (word) only] if it's in parenthesis and brakets")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "word"
|
||||
|
||||
matches = rebulk.matches("[don't grab](word)[if brakets are outside]")
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = rebulk.matches("(grab w[or)d even] if it's partially in parenthesis and brakets")
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "w[or)d"
|
||||
|
||||
def test_at_index_marker(self):
|
||||
class MarkerRule(Rule):
|
||||
def when(self, matches, context):
|
||||
word_match = matches.named("word", 0)
|
||||
marker = matches.markers.at_index(word_match.start,
|
||||
lambda marker: marker.name == "mark1", 0)
|
||||
if not marker:
|
||||
return word_match
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
|
||||
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
|
||||
.regex("w.*?d", name="word") \
|
||||
.rules(MarkerRule)
|
||||
|
||||
matches = rebulk.matches("gr(ab wo)rd only if starting of match is inside parenthesis")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "wo)rd"
|
||||
|
||||
matches = rebulk.matches("don't grab wo(rd if starting of match is not inside parenthesis")
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_remove_marker(self):
|
||||
class MarkerRule(Rule):
|
||||
def when(self, matches, context):
|
||||
marker = matches.markers.named("mark1", 0)
|
||||
if marker:
|
||||
return marker
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.markers.remove(when_response)
|
||||
|
||||
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
|
||||
.regex("w.*?d", name="word") \
|
||||
.rules(MarkerRule)
|
||||
|
||||
matches = rebulk.matches("grab word event (if it's not) inside parenthesis")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "word"
|
||||
|
||||
assert not matches.markers
|
||||
|
||||
|
||||
class TestUnicode(object):
|
||||
def test_rebulk_simple(self):
|
||||
input_string = u"敏捷的棕色狐狸跳過懶狗"
|
||||
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.string(u"敏")
|
||||
rebulk.regex(u"捷")
|
||||
|
||||
def func(input_string):
|
||||
i = input_string.find(u"的")
|
||||
if i > -1:
|
||||
return i, i + len(u"的")
|
||||
|
||||
rebulk.functional(func)
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].value == u"敏"
|
||||
assert matches[1].value == u"捷"
|
||||
assert matches[2].value == u"的"
|
||||
|
||||
|
||||
class TestImmutable(object):
|
||||
def test_starting(self):
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
matches = Rebulk().string("quick").string("over").string("fox").matches(input_string)
|
||||
|
||||
for i in range(0, len(input_string)):
|
||||
starting = matches.starting(i)
|
||||
for match in list(starting):
|
||||
starting.remove(match)
|
||||
|
||||
assert len(matches) == 3
|
||||
|
||||
def test_ending(self):
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
matches = Rebulk().string("quick").string("over").string("fox").matches(input_string)
|
||||
|
||||
for i in range(0, len(input_string)):
|
||||
starting = matches.ending(i)
|
||||
for match in list(starting):
|
||||
starting.remove(match)
|
||||
|
||||
assert len(matches) == 3
|
||||
|
||||
def test_named(self):
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
matches = Rebulk().defaults(name='test').string("quick").string("over").string("fox").matches(input_string)
|
||||
|
||||
named = matches.named('test')
|
||||
for match in list(named):
|
||||
named.remove(match)
|
||||
|
||||
assert len(named) == 0
|
||||
assert len(matches) == 3
|
197
libs/common/rebulk/test/test_rules.py
Normal file
197
libs/common/rebulk/test/test_rules.py
Normal file
|
@ -0,0 +1,197 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, no-member, len-as-condition
|
||||
import pytest
|
||||
from rebulk.test.default_rules_module import RuleRemove0, RuleAppend0, RuleRename0, RuleAppend1, RuleRemove1, \
|
||||
RuleRename1, RuleAppend2, RuleRename2, RuleAppend3, RuleRename3, RuleAppendTags0, RuleRemoveTags0, \
|
||||
RuleAppendTags1, RuleRemoveTags1
|
||||
|
||||
from ..rules import Rules
|
||||
from ..match import Matches, Match
|
||||
|
||||
from .rules_module import Rule1, Rule2, Rule3, Rule0, Rule1Disabled
|
||||
from . import rules_module as rm
|
||||
|
||||
|
||||
def test_rule_priority():
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule1, Rule2())
|
||||
|
||||
rules.execute_all_rules(matches, {})
|
||||
assert len(matches) == 0
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule1(), Rule0)
|
||||
|
||||
rules.execute_all_rules(matches, {})
|
||||
assert len(matches) == 1
|
||||
assert matches[0] == Match(3, 4)
|
||||
|
||||
|
||||
def test_rules_duplicates():
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule1, Rule1)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
|
||||
def test_rule_disabled():
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule1Disabled(), Rule2())
|
||||
|
||||
rules.execute_all_rules(matches, {})
|
||||
assert len(matches) == 2
|
||||
assert matches[0] == Match(1, 2)
|
||||
assert matches[1] == Match(3, 4)
|
||||
|
||||
|
||||
def test_rule_when():
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule3())
|
||||
|
||||
rules.execute_all_rules(matches, {'when': False})
|
||||
assert len(matches) == 1
|
||||
assert matches[0] == Match(1, 2)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules.execute_all_rules(matches, {'when': True})
|
||||
assert len(matches) == 2
|
||||
assert matches[0] == Match(1, 2)
|
||||
assert matches[1] == Match(3, 4)
|
||||
|
||||
|
||||
class TestDefaultRules(object):
|
||||
def test_remove(self):
|
||||
rules = Rules(RuleRemove0)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
rules = Rules(RuleRemove1)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_append(self):
|
||||
rules = Rules(RuleAppend0)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
rules = Rules(RuleAppend1)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
rules = Rules(RuleAppend2)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 2
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
rules = Rules(RuleAppend3)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 2
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
def test_rename(self):
|
||||
rules = Rules(RuleRename0)
|
||||
|
||||
matches = Matches([Match(1, 2, name='original')])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('original')) == 1
|
||||
assert len(matches.named('renamed')) == 0
|
||||
|
||||
rules = Rules(RuleRename1)
|
||||
|
||||
matches = Matches([Match(5, 10, name='original')])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('original')) == 0
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
rules = Rules(RuleRename2)
|
||||
|
||||
matches = Matches([Match(5, 10, name='original')])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('original')) == 0
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
rules = Rules(RuleRename3)
|
||||
|
||||
matches = Matches([Match(5, 10, name='original')])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('original')) == 0
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
def test_append_tags(self):
|
||||
rules = Rules(RuleAppendTags0)
|
||||
|
||||
matches = Matches([Match(1, 2, name='tags', tags=['other'])])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('tags')) == 1
|
||||
assert matches.named('tags', index=0).tags == ['other', 'new-tag']
|
||||
|
||||
rules = Rules(RuleAppendTags1)
|
||||
|
||||
matches = Matches([Match(1, 2, name='tags', tags=['other'])])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('tags')) == 1
|
||||
assert matches.named('tags', index=0).tags == ['other', 'new-tag']
|
||||
|
||||
def test_remove_tags(self):
|
||||
rules = Rules(RuleRemoveTags0)
|
||||
|
||||
matches = Matches([Match(1, 2, name='tags', tags=['other', 'new-tag'])])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('tags')) == 1
|
||||
assert matches.named('tags', index=0).tags == ['other']
|
||||
|
||||
rules = Rules(RuleRemoveTags1)
|
||||
|
||||
matches = Matches([Match(1, 2, name='tags', tags=['other', 'new-tag'])])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('tags')) == 1
|
||||
assert matches.named('tags', index=0).tags == ['other']
|
||||
|
||||
|
||||
def test_rule_module():
|
||||
rules = Rules(rm)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 1
|
||||
|
||||
|
||||
def test_rule_repr():
|
||||
assert str(Rule0()) == "<Rule0>"
|
||||
assert str(Rule1()) == "<Rule1>"
|
||||
assert str(Rule2()) == "<Rule2>"
|
||||
assert str(Rule1Disabled()) == "<Disabled Rule1>"
|
111
libs/common/rebulk/test/test_toposort.py
Normal file
111
libs/common/rebulk/test/test_toposort.py
Normal file
|
@ -0,0 +1,111 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2014 True Blade Systems, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Original:
|
||||
# - https://bitbucket.org/ericvsmith/toposort (1.4)
|
||||
# Modifications:
|
||||
# - port to pytest
|
||||
# pylint: skip-file
|
||||
|
||||
import pytest
|
||||
from ..toposort import toposort, toposort_flatten, CyclicDependency
|
||||
|
||||
|
||||
class TestCase(object):
|
||||
def test_simple(self):
|
||||
results = list(toposort({2: set([11]), 9: set([11, 8]), 10: set([11, 3]), 11: set([7, 5]), 8: set([7, 3])}))
|
||||
expected = [set([3, 5, 7]), set([8, 11]), set([2, 9, 10])]
|
||||
assert results == expected
|
||||
|
||||
# make sure self dependencies are ignored
|
||||
results = list(toposort({2: set([2, 11]), 9: set([11, 8]), 10: set([10, 11, 3]), 11: set([7, 5]), 8: set([7, 3])}))
|
||||
expected = [set([3, 5, 7]), set([8, 11]), set([2, 9, 10])]
|
||||
assert results == expected
|
||||
|
||||
assert list(toposort({1: set()})) == [set([1])]
|
||||
assert list(toposort({1: set([1])})) == [set([1])]
|
||||
|
||||
def test_no_dependencies(self):
|
||||
assert list(toposort({1: set([2]), 3: set([4]), 5: set([6])})) == [set([2, 4, 6]), set([1, 3, 5])]
|
||||
assert list(toposort({1: set(), 3: set(), 5: set()})) == [set([1, 3, 5])]
|
||||
|
||||
def test_empty(self):
|
||||
assert list(toposort({})) == []
|
||||
|
||||
def test_strings(self):
|
||||
results = list(toposort({'2': set(['11']), '9': set(['11', '8']), '10': set(['11', '3']), '11': set(['7', '5']), '8': set(['7', '3'])}))
|
||||
expected = [set(['3', '5', '7']), set(['8', '11']), set(['2', '9', '10'])]
|
||||
assert results == expected
|
||||
|
||||
def test_objects(self):
|
||||
o2 = object()
|
||||
o3 = object()
|
||||
o5 = object()
|
||||
o7 = object()
|
||||
o8 = object()
|
||||
o9 = object()
|
||||
o10 = object()
|
||||
o11 = object()
|
||||
results = list(toposort({o2: set([o11]), o9: set([o11, o8]), o10: set([o11, o3]), o11: set([o7, o5]), o8: set([o7, o3, o8])}))
|
||||
expected = [set([o3, o5, o7]), set([o8, o11]), set([o2, o9, o10])]
|
||||
assert results == expected
|
||||
|
||||
def test_cycle(self):
|
||||
# a simple, 2 element cycle
|
||||
with pytest.raises(CyclicDependency):
|
||||
list(toposort({1: set([2]), 2: set([1])}))
|
||||
|
||||
# an indirect cycle
|
||||
with pytest.raises(CyclicDependency):
|
||||
list(toposort({1: set([2]), 2: set([3]), 3: set([1])}))
|
||||
|
||||
def test_input_not_modified(self):
|
||||
data = {2: set([11]),
|
||||
9: set([11, 8]),
|
||||
10: set([11, 3]),
|
||||
11: set([7, 5]),
|
||||
8: set([7, 3, 8]), # includes something self-referential
|
||||
}
|
||||
orig = data.copy()
|
||||
results = list(toposort(data))
|
||||
assert data == orig
|
||||
|
||||
def test_input_not_modified_when_cycle_error(self):
|
||||
data = {1: set([2]),
|
||||
2: set([1]),
|
||||
3: set([4]),
|
||||
}
|
||||
orig = data.copy()
|
||||
with pytest.raises(CyclicDependency):
|
||||
list(toposort(data))
|
||||
assert data == orig
|
||||
|
||||
|
||||
class TestCaseAll(object):
|
||||
def test_sort_flatten(self):
|
||||
data = {2: set([11]),
|
||||
9: set([11, 8]),
|
||||
10: set([11, 3]),
|
||||
11: set([7, 5]),
|
||||
8: set([7, 3, 8]), # includes something self-referential
|
||||
}
|
||||
expected = [set([3, 5, 7]), set([8, 11]), set([2, 9, 10])]
|
||||
assert list(toposort(data)) == expected
|
||||
|
||||
# now check the sorted results
|
||||
results = []
|
||||
for item in expected:
|
||||
results.extend(sorted(item))
|
||||
assert toposort_flatten(data) == results
|
||||
|
||||
# and the unsorted results. break the results up into groups to compare them
|
||||
actual = toposort_flatten(data, False)
|
||||
results = [set([i for i in actual[0:3]]), set([i for i in actual[3:5]]), set([i for i in actual[5:8]])]
|
||||
assert results == expected
|
64
libs/common/rebulk/test/test_validators.py
Normal file
64
libs/common/rebulk/test/test_validators.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name,len-as-condition
|
||||
|
||||
from functools import partial
|
||||
|
||||
from rebulk.pattern import StringPattern
|
||||
|
||||
from ..validators import chars_before, chars_after, chars_surround, validators
|
||||
|
||||
chars = ' _.'
|
||||
left = partial(chars_before, chars)
|
||||
right = partial(chars_after, chars)
|
||||
surrounding = partial(chars_surround, chars)
|
||||
|
||||
|
||||
def test_left_chars():
|
||||
matches = list(StringPattern("word", validator=left).matches("xxxwordxxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=left).matches("xxx_wordxxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
matches = list(StringPattern("word", validator=left).matches("wordxxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
|
||||
def test_right_chars():
|
||||
matches = list(StringPattern("word", validator=right).matches("xxxwordxxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=right).matches("xxxword.xxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
matches = list(StringPattern("word", validator=right).matches("xxxword"))
|
||||
assert len(matches) == 1
|
||||
|
||||
|
||||
def test_surrounding_chars():
|
||||
matches = list(StringPattern("word", validator=surrounding).matches("xxxword xxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=surrounding).matches("xxx.wordxxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=surrounding).matches("xxx word_xxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
matches = list(StringPattern("word", validator=surrounding).matches("word"))
|
||||
assert len(matches) == 1
|
||||
|
||||
|
||||
def test_chain():
|
||||
matches = list(StringPattern("word", validator=validators(left, right)).matches("xxxword xxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=validators(left, right)).matches("xxx.wordxxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=validators(left, right)).matches("xxx word_xxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
matches = list(StringPattern("word", validator=validators(left, right)).matches("word"))
|
||||
assert len(matches) == 1
|
84
libs/common/rebulk/toposort.py
Normal file
84
libs/common/rebulk/toposort.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2014 True Blade Systems, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Original:
|
||||
# - https://bitbucket.org/ericvsmith/toposort (1.4)
|
||||
# Modifications:
|
||||
# - merged Pull request #2 for CyclicDependency error
|
||||
# - import reduce as original name
|
||||
# - support python 2.6 dict comprehension
|
||||
|
||||
# pylint: skip-file
|
||||
from functools import reduce
|
||||
|
||||
|
||||
class CyclicDependency(ValueError):
|
||||
def __init__(self, cyclic):
|
||||
s = 'Cyclic dependencies exist among these items: {0}'.format(', '.join(repr(x) for x in cyclic.items()))
|
||||
super(CyclicDependency, self).__init__(s)
|
||||
self.cyclic = cyclic
|
||||
|
||||
|
||||
def toposort(data):
|
||||
"""
|
||||
Dependencies are expressed as a dictionary whose keys are items
|
||||
and whose values are a set of dependent items. Output is a list of
|
||||
sets in topological order. The first set consists of items with no
|
||||
dependences, each subsequent set consists of items that depend upon
|
||||
items in the preceeding sets.
|
||||
:param data:
|
||||
:type data:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
# Special case empty input.
|
||||
if len(data) == 0:
|
||||
return
|
||||
|
||||
# Copy the input so as to leave it unmodified.
|
||||
data = data.copy()
|
||||
|
||||
# Ignore self dependencies.
|
||||
for k, v in data.items():
|
||||
v.discard(k)
|
||||
# Find all items that don't depend on anything.
|
||||
extra_items_in_deps = reduce(set.union, data.values()) - set(data.keys())
|
||||
# Add empty dependences where needed.
|
||||
data.update(dict((item, set()) for item in extra_items_in_deps))
|
||||
while True:
|
||||
ordered = set(item for item, dep in data.items() if len(dep) == 0)
|
||||
if not ordered:
|
||||
break
|
||||
yield ordered
|
||||
data = dict((item, (dep - ordered))
|
||||
for item, dep in data.items()
|
||||
if item not in ordered)
|
||||
if len(data) != 0:
|
||||
raise CyclicDependency(data)
|
||||
|
||||
|
||||
def toposort_flatten(data, sort=True):
|
||||
"""
|
||||
Returns a single list of dependencies. For any set returned by
|
||||
toposort(), those items are sorted and appended to the result (just to
|
||||
make the results deterministic).
|
||||
:param data:
|
||||
:type data:
|
||||
:param sort:
|
||||
:type sort:
|
||||
:return: Single list of dependencies.
|
||||
:rtype: list
|
||||
"""
|
||||
|
||||
result = []
|
||||
for d in toposort(data):
|
||||
result.extend((sorted if sort else list)(d))
|
||||
return result
|
156
libs/common/rebulk/utils.py
Normal file
156
libs/common/rebulk/utils.py
Normal file
|
@ -0,0 +1,156 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Various utilities functions
|
||||
"""
|
||||
try:
|
||||
from collections.abc import MutableSet
|
||||
except ImportError:
|
||||
from collections import MutableSet
|
||||
|
||||
from types import GeneratorType
|
||||
|
||||
|
||||
def find_all(string, sub, start=None, end=None, ignore_case=False, **kwargs):
|
||||
"""
|
||||
Return all indices in string s where substring sub is
|
||||
found, such that sub is contained in the slice s[start:end].
|
||||
|
||||
>>> list(find_all('The quick brown fox jumps over the lazy dog', 'fox'))
|
||||
[16]
|
||||
|
||||
>>> list(find_all('The quick brown fox jumps over the lazy dog', 'mountain'))
|
||||
[]
|
||||
|
||||
>>> list(find_all('The quick brown fox jumps over the lazy dog', 'The'))
|
||||
[0]
|
||||
|
||||
>>> list(find_all(
|
||||
... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
|
||||
... 'an'))
|
||||
[44, 51, 70]
|
||||
|
||||
>>> list(find_all(
|
||||
... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
|
||||
... 'an',
|
||||
... 50,
|
||||
... 60))
|
||||
[51]
|
||||
|
||||
:param string: the input string
|
||||
:type string: str
|
||||
:param sub: the substring
|
||||
:type sub: str
|
||||
:return: all indices in the input string
|
||||
:rtype: __generator[str]
|
||||
"""
|
||||
#pylint: disable=unused-argument
|
||||
if ignore_case:
|
||||
sub = sub.lower()
|
||||
string = string.lower()
|
||||
while True:
|
||||
start = string.find(sub, start, end)
|
||||
if start == -1:
|
||||
return
|
||||
yield start
|
||||
start += len(sub)
|
||||
|
||||
|
||||
def get_first_defined(data, keys, default_value=None):
|
||||
"""
|
||||
Get the first defined key in data.
|
||||
:param data:
|
||||
:type data:
|
||||
:param keys:
|
||||
:type keys:
|
||||
:param default_value:
|
||||
:type default_value:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for key in keys:
|
||||
if key in data:
|
||||
return data[key]
|
||||
return default_value
|
||||
|
||||
|
||||
def is_iterable(obj):
|
||||
"""
|
||||
Are we being asked to look up a list of things, instead of a single thing?
|
||||
We check for the `__iter__` attribute so that this can cover types that
|
||||
don't have to be known by this module, such as NumPy arrays.
|
||||
|
||||
Strings, however, should be considered as atomic values to look up, not
|
||||
iterables.
|
||||
|
||||
We don't need to check for the Python 2 `unicode` type, because it doesn't
|
||||
have an `__iter__` attribute anyway.
|
||||
"""
|
||||
# pylint: disable=consider-using-ternary
|
||||
return hasattr(obj, '__iter__') and not isinstance(obj, str) or isinstance(obj, GeneratorType)
|
||||
|
||||
|
||||
def extend_safe(target, source):
|
||||
"""
|
||||
Extends source list to target list only if elements doesn't exists in target list.
|
||||
:param target:
|
||||
:type target: list
|
||||
:param source:
|
||||
:type source: list
|
||||
"""
|
||||
for elt in source:
|
||||
if elt not in target:
|
||||
target.append(elt)
|
||||
|
||||
|
||||
class _Ref(object):
|
||||
"""
|
||||
Reference for IdentitySet
|
||||
"""
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.value is other.value
|
||||
|
||||
def __hash__(self):
|
||||
return id(self.value)
|
||||
|
||||
|
||||
class IdentitySet(MutableSet): # pragma: no cover
|
||||
"""
|
||||
Set based on identity
|
||||
"""
|
||||
def __init__(self, items=None): # pylint: disable=super-init-not-called
|
||||
if items is None:
|
||||
items = []
|
||||
self.refs = set(map(_Ref, items))
|
||||
|
||||
def __contains__(self, elem):
|
||||
return _Ref(elem) in self.refs
|
||||
|
||||
def __iter__(self):
|
||||
return (ref.value for ref in self.refs)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.refs)
|
||||
|
||||
def add(self, value):
|
||||
self.refs.add(_Ref(value))
|
||||
|
||||
def discard(self, value):
|
||||
self.refs.discard(_Ref(value))
|
||||
|
||||
def update(self, iterable):
|
||||
"""
|
||||
Update set with iterable
|
||||
:param iterable:
|
||||
:type iterable:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for elem in iterable:
|
||||
self.add(elem)
|
||||
|
||||
def __repr__(self): # pragma: no cover
|
||||
return "%s(%s)" % (type(self).__name__, list(self))
|
70
libs/common/rebulk/validators.py
Normal file
70
libs/common/rebulk/validators.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Validator functions to use in patterns.
|
||||
|
||||
All those function have last argument as match, so it's possible to use functools.partial to bind previous arguments.
|
||||
"""
|
||||
|
||||
|
||||
def chars_before(chars, match):
|
||||
"""
|
||||
Validate the match if left character is in a given sequence.
|
||||
|
||||
:param chars:
|
||||
:type chars:
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if match.start <= 0:
|
||||
return True
|
||||
return match.input_string[match.start - 1] in chars
|
||||
|
||||
|
||||
def chars_after(chars, match):
|
||||
"""
|
||||
Validate the match if right character is in a given sequence.
|
||||
|
||||
:param chars:
|
||||
:type chars:
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if match.end >= len(match.input_string):
|
||||
return True
|
||||
return match.input_string[match.end] in chars
|
||||
|
||||
|
||||
def chars_surround(chars, match):
|
||||
"""
|
||||
Validate the match if surrounding characters are in a given sequence.
|
||||
|
||||
:param chars:
|
||||
:type chars:
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return chars_before(chars, match) and chars_after(chars, match)
|
||||
|
||||
|
||||
def validators(*chained_validators):
|
||||
"""
|
||||
Creates a validator chain from several validator functions.
|
||||
|
||||
:param chained_validators:
|
||||
:type chained_validators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
def validator_chain(match): # pylint:disable=missing-docstring
|
||||
for chained_validator in chained_validators:
|
||||
if not chained_validator(match):
|
||||
return False
|
||||
return True
|
||||
return validator_chain
|
Loading…
Add table
Add a link
Reference in a new issue