Move common libs to libs/common

This commit is contained in:
Labrys of Knossos 2018-12-16 13:30:24 -05:00
commit 1f4bd41bcc
1612 changed files with 962 additions and 10 deletions

View file

@ -0,0 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring

View file

@ -0,0 +1,79 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
from ..match import Match
from ..rules import Rule, RemoveMatch, AppendMatch, RenameMatch, AppendTags, RemoveTags
class RuleRemove0(Rule):
consequence = RemoveMatch
def when(self, matches, context):
return matches[0]
class RuleAppend0(Rule):
consequence = AppendMatch()
def when(self, matches, context):
return Match(5, 10)
class RuleRename0(Rule):
consequence = [RenameMatch('renamed')]
def when(self, matches, context):
return [Match(5, 10, name="original")]
class RuleRemove1(Rule):
consequence = [RemoveMatch()]
def when(self, matches, context):
return [matches[0]]
class RuleAppend1(Rule):
consequence = [AppendMatch]
def when(self, matches, context):
return [Match(5, 10)]
class RuleRename1(Rule):
consequence = RenameMatch('renamed')
def when(self, matches, context):
return [Match(5, 10, name="original")]
class RuleAppend2(Rule):
consequence = [AppendMatch('renamed')]
properties = {'renamed': [None]}
def when(self, matches, context):
return [Match(5, 10)]
class RuleRename2(Rule):
consequence = RenameMatch('renamed')
def when(self, matches, context):
return Match(5, 10, name="original")
class RuleAppend3(Rule):
consequence = AppendMatch('renamed')
properties = {'renamed': [None]}
def when(self, matches, context):
return [Match(5, 10)]
class RuleRename3(Rule):
consequence = [RenameMatch('renamed')]
def when(self, matches, context):
return Match(5, 10, name="original")
class RuleAppendTags0(Rule):
consequence = AppendTags(['new-tag'])
def when(self, matches, context):
return matches.named('tags', 0)
class RuleRemoveTags0(Rule):
consequence = RemoveTags(['new-tag'])
def when(self, matches, context):
return matches.named('tags', 0)
class RuleAppendTags1(Rule):
consequence = AppendTags(['new-tag'])
def when(self, matches, context):
return matches.named('tags')
class RuleRemoveTags1(Rule):
consequence = RemoveTags(['new-tag'])
def when(self, matches, context):
return matches.named('tags')

View file

@ -0,0 +1,38 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
from rebulk.rules import Rule, RemoveMatch, CustomRule
class RemoveAllButLastYear(Rule):
consequence = RemoveMatch
def when(self, matches, context):
entries = matches.named('year')
return entries[:-1]
class PrefixedSuffixedYear(CustomRule):
def when(self, matches, context):
toRemove = []
years = matches.named('year')
for year in years:
if not matches.previous(year, lambda p: p.name == 'yearPrefix') and \
not matches.next(year, lambda n: n.name == 'yearSuffix'):
toRemove.append(year)
return toRemove
def then(self, matches, when_response, context):
for to_remove in when_response:
matches.remove(to_remove)
class PrefixedSuffixedYearNoLambda(Rule):
consequence = RemoveMatch
def when(self, matches, context):
toRemove = []
years = matches.named('year')
for year in years:
if not [m for m in matches.previous(year) if m.name == 'yearPrefix'] and \
not [m for m in matches.next(year) if m.name == 'yearSuffix']:
toRemove.append(year)
return toRemove

View file

@ -0,0 +1,54 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
from ..match import Match
from ..rules import Rule
class Rule3(Rule):
def when(self, matches, context):
return context.get('when')
def then(self, matches, when_response, context):
assert when_response in [True, False]
matches.append(Match(3, 4))
class Rule2(Rule):
dependency = Rule3
def when(self, matches, context):
return True
def then(self, matches, when_response, context):
assert when_response
matches.append(Match(3, 4))
class Rule1(Rule):
dependency = Rule2
def when(self, matches, context):
return True
def then(self, matches, when_response, context):
assert when_response
matches.clear()
class Rule0(Rule):
dependency = Rule1
def when(self, matches, context):
return True
def then(self, matches, when_response, context):
assert when_response
matches.append(Match(3, 4))
class Rule1Disabled(Rule1):
name = "Disabled Rule1"
def enabled(self, context):
return False

View file

@ -0,0 +1,411 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
import re
from functools import partial
from ..validators import chars_surround
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
def test_chain_close():
rebulk = Rebulk()
ret = rebulk.chain().close()
assert ret == rebulk
assert len(rebulk.effective_patterns()) == 1
def test_build_chain():
rebulk = Rebulk()
def digit(input_string):
i = input_string.find("1849")
if i > -1:
return i, i + len("1849")
ret = rebulk.chain() \
.functional(digit) \
.string("test").repeater(2) \
.string("x").repeater('{1,3}') \
.string("optional").repeater('?') \
.regex("f?x").repeater('+') \
.close()
assert ret == rebulk
assert len(rebulk.effective_patterns()) == 1
chain = rebulk.effective_patterns()[0]
assert len(chain.parts) == 5
assert isinstance(chain.parts[0].pattern, FunctionalPattern)
assert chain.parts[0].repeater_start == 1
assert chain.parts[0].repeater_end == 1
assert isinstance(chain.parts[1].pattern, StringPattern)
assert chain.parts[1].repeater_start == 2
assert chain.parts[1].repeater_end == 2
assert isinstance(chain.parts[2].pattern, StringPattern)
assert chain.parts[2].repeater_start == 1
assert chain.parts[2].repeater_end == 3
assert isinstance(chain.parts[3].pattern, StringPattern)
assert chain.parts[3].repeater_start == 0
assert chain.parts[3].repeater_end == 1
assert isinstance(chain.parts[4].pattern, RePattern)
assert chain.parts[4].repeater_start == 1
assert chain.parts[4].repeater_end is None
def test_chain_defaults():
rebulk = Rebulk()
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
rebulk.chain()\
.regex("(?P<test>test)") \
.regex(" ").repeater("*") \
.regex("(?P<testIgnore>testIgnore)")
matches = rebulk.matches("test testIgnore")
assert len(matches) == 1
assert matches[0].name == "test"
def test_matches():
rebulk = Rebulk()
def digit(input_string):
i = input_string.find("1849")
if i > -1:
return i, i + len("1849")
input_string = "1849testtestxxfixfux_foxabc1849testtestxoptionalfoxabc"
chain = rebulk.chain() \
.functional(digit) \
.string("test").hidden().repeater(2) \
.string("x").hidden().repeater('{1,3}') \
.string("optional").hidden().repeater('?') \
.regex("f.?x", name='result').repeater('+') \
.close()
matches = chain.matches(input_string)
assert len(matches) == 2
children = matches[0].children
assert children[0].value == '1849'
assert children[1].value == 'fix'
assert children[2].value == 'fux'
children = matches[1].children
assert children[0].value == '1849'
assert children[1].value == 'fox'
input_string = "_1850testtestxoptionalfoxabc"
matches = chain.matches(input_string)
assert len(matches) == 0
input_string = "_1849testtesttesttestxoptionalfoxabc"
matches = chain.matches(input_string)
assert len(matches) == 0
input_string = "_1849testtestxxxxoptionalfoxabc"
matches = chain.matches(input_string)
assert len(matches) == 0
input_string = "_1849testtestoptionalfoxabc"
matches = chain.matches(input_string)
assert len(matches) == 0
input_string = "_1849testtestxoptionalabc"
matches = chain.matches(input_string)
assert len(matches) == 0
input_string = "_1849testtestxoptionalfaxabc"
matches = chain.matches(input_string)
assert len(matches) == 1
children = matches[0].children
assert children[0].value == '1849'
assert children[1].value == 'fax'
def test_matches_2():
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.chain(children=True, formatter={'episode': int}) \
.defaults(formatter={'version': int}) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
.close()
matches = rebulk.matches("This is E14v2-15E16x17")
assert len(matches) == 5
assert matches[0].name == 'episode'
assert matches[0].value == 14
assert matches[1].name == 'version'
assert matches[1].value == 2
assert matches[2].name == 'episode'
assert matches[2].value == 15
assert matches[3].name == 'episode'
assert matches[3].value == 16
assert matches[4].name == 'episode'
assert matches[4].value == 17
def test_matches_3():
alt_dash = (r'@', r'[\W_]') # abbreviation
rebulk = Rebulk()
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
private_names=['episodeSeparator', 'seasonSeparator'],
children=True,
private_parent=True,
conflict_solver=lambda match, other: match
if match.name in ['season', 'episode'] and other.name in
['screen_size', 'video_codec', 'audio_codec',
'audio_channels', 'container', 'date']
else '__default__') \
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
.chain() \
.regex(r'S(?P<season>\d+)') \
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
matches = rebulk.matches("test-01x02-03")
assert len(matches) == 3
assert matches[0].name == 'season'
assert matches[0].value == 1
assert matches[1].name == 'episode'
assert matches[1].value == 2
assert matches[2].name == 'episode'
assert matches[2].value == 3
matches = rebulk.matches("test-S01E02-03")
assert len(matches) == 3
assert matches[0].name == 'season'
assert matches[0].value == 1
assert matches[1].name == 'episode'
assert matches[1].value == 2
assert matches[2].name == 'episode'
assert matches[2].value == 3
matches = rebulk.matches("test-S01-02-03-04")
assert len(matches) == 4
assert matches[0].name == 'season'
assert matches[0].value == 1
assert matches[1].name == 'season'
assert matches[1].value == 2
assert matches[2].name == 'season'
assert matches[2].value == 3
assert matches[3].name == 'season'
assert matches[3].value == 4
def test_matches_4():
seps_surround = partial(chars_surround, " ")
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
matches = rebulk.matches("Some Series E01E02E03")
assert len(matches) == 3
assert matches[0].value == 1
assert matches[1].value == 2
assert matches[2].value == 3
def test_matches_5():
seps_surround = partial(chars_surround, " ")
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
matches = rebulk.matches("Some Series E01E02E03")
assert len(matches) == 3
matches = rebulk.matches("Some Series E01E02")
assert len(matches) == 0
matches = rebulk.matches("Some Series E01E02E03E04E05E06") # Parent can't be validated, so no results at all
assert len(matches) == 0
def test_matches_6():
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator=None, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
matches = rebulk.matches("Some Series E01E02E03")
assert len(matches) == 3
matches = rebulk.matches("Some Series E01E02")
assert len(matches) == 0
matches = rebulk.matches("Some Series E01E02E03E04E05E06") # No validator on parent, so it should give 4 episodes.
assert len(matches) == 4
def test_matches_7():
seps_surround = partial(chars_surround, ' .-/')
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(children=True, private_parent=True)
rebulk.chain(). \
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
matches = rebulk.matches("Some S01")
assert len(matches) == 1
matches[0].value = 1
matches = rebulk.matches("Some S01-02")
assert len(matches) == 2
matches[0].value = 1
matches[1].value = 2
matches = rebulk.matches("programs4/Some S01-02")
assert len(matches) == 2
matches[0].value = 1
matches[1].value = 2
matches = rebulk.matches("programs4/SomeS01middle.S02-03.andS04here")
assert len(matches) == 2
matches[0].value = 2
matches[1].value = 3
matches = rebulk.matches("Some 02.and.S04-05.here")
assert len(matches) == 2
matches[0].value = 4
matches[1].value = 5
def test_chain_breaker():
def chain_breaker(matches):
seasons = matches.named('season')
if len(seasons) > 1:
if seasons[-1].value - seasons[-2].value > 10:
return True
return False
seps_surround = partial(chars_surround, ' .-/')
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(children=True, private_parent=True, formatter={'season': int})
rebulk.chain(chain_breaker=chain_breaker). \
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
matches = rebulk.matches("Some S01-02-03-50-51")
assert len(matches) == 3
matches[0].value = 1
matches[1].value = 2
matches[2].value = 3
def test_chain_breaker_defaults():
def chain_breaker(matches):
seasons = matches.named('season')
if len(seasons) > 1:
if seasons[-1].value - seasons[-2].value > 10:
return True
return False
seps_surround = partial(chars_surround, ' .-/')
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(chain_breaker=chain_breaker, children=True, private_parent=True, formatter={'season': int})
rebulk.chain(). \
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
matches = rebulk.matches("Some S01-02-03-50-51")
assert len(matches) == 3
matches[0].value = 1
matches[1].value = 2
matches[2].value = 3
def test_chain_breaker_defaults2():
def chain_breaker(matches):
seasons = matches.named('season')
if len(seasons) > 1:
if seasons[-1].value - seasons[-2].value > 10:
return True
return False
seps_surround = partial(chars_surround, ' .-/')
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.chain_defaults(chain_breaker=chain_breaker)
rebulk.defaults(children=True, private_parent=True, formatter={'season': int})
rebulk.chain(). \
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
matches = rebulk.matches("Some S01-02-03-50-51")
assert len(matches) == 3
matches[0].value = 1
matches[1].value = 2
matches[2].value = 3

View file

@ -0,0 +1,83 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
from ..pattern import StringPattern
from ..rebulk import Rebulk
from ..match import Match
from .. import debug
from .default_rules_module import RuleRemove0
class TestDebug(object):
#request.addfinalizer(disable_debug)
debug.DEBUG = True
pattern = StringPattern(1, 3, value="es")
match = Match(1, 3, value="es")
rule = RuleRemove0()
input_string = "This is a debug test"
rebulk = Rebulk().string("debug") \
.string("is")
matches = rebulk.matches(input_string)
debug.DEBUG = False
@classmethod
def setup_class(cls):
debug.DEBUG = True
@classmethod
def teardown_class(cls):
debug.DEBUG = False
def test_pattern(self):
assert self.pattern.defined_at.lineno == 20
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
assert self.pattern.defined_at.filename.endswith('test_debug.py')
assert str(self.pattern.defined_at) == 'test_debug.py#L20'
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
def test_match(self):
assert self.match.defined_at.lineno == 22
assert self.match.defined_at.name == 'rebulk.test.test_debug'
assert self.match.defined_at.filename.endswith('test_debug.py')
assert str(self.match.defined_at) == 'test_debug.py#L22'
def test_rule(self):
assert self.rule.defined_at.lineno == 23
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
assert self.rule.defined_at.filename.endswith('test_debug.py')
assert str(self.rule.defined_at) == 'test_debug.py#L23'
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
def test_rebulk(self):
"""
This test fails on travis CI, can't find out why there's 1 line offset ...
"""
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at
def test_repr(self):
str(self.matches)

View file

@ -0,0 +1,138 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Introspector tests
"""
# pylint: disable=no-self-use,pointless-statement,missing-docstring,protected-access,invalid-name,len-as-condition
from ..rebulk import Rebulk
from .. import introspector
from .default_rules_module import RuleAppend2, RuleAppend3
def test_string_introspector():
rebulk = Rebulk().string('One', 'Two', 'Three', name='first').string('1', '2', '3', name='second')
introspected = introspector.introspect(rebulk, None)
assert len(introspected.patterns) == 2
first_properties = introspected.patterns[0].properties
assert len(first_properties) == 1
first_properties['first'] == ['One', 'Two', 'Three']
second_properties = introspected.patterns[1].properties
assert len(second_properties) == 1
second_properties['second'] == ['1', '2', '3']
properties = introspected.properties
assert len(properties) == 2
assert properties['first'] == first_properties['first']
assert properties['second'] == second_properties['second']
def test_string_properties():
rebulk = Rebulk()\
.string('One', 'Two', 'Three', name='first', properties={'custom': ['One']})\
.string('1', '2', '3', name='second', properties={'custom': [1]})
introspected = introspector.introspect(rebulk, None)
assert len(introspected.patterns) == 2
assert len(introspected.rules) == 2
first_properties = introspected.patterns[0].properties
assert len(first_properties) == 1
first_properties['custom'] == ['One']
second_properties = introspected.patterns[1].properties
assert len(second_properties) == 1
second_properties['custom'] == [1]
properties = introspected.properties
assert len(properties) == 1
assert properties['custom'] == ['One', 1]
def test_various_pattern():
rebulk = Rebulk()\
.regex('One', 'Two', 'Three', name='first', value="string") \
.string('1', '2', '3', name='second', value="digit") \
.string('4', '5', '6', name='third') \
.string('private', private=True) \
.functional(lambda string: (0, 5), name='func', value='test') \
.regex('One', 'Two', 'Three', name='regex_name') \
.regex('(?P<one>One)(?P<two>Two)(?P<three>Three)') \
.functional(lambda string: (6, 10), name='func2') \
.string('7', name='third')
introspected = introspector.introspect(rebulk, None)
assert len(introspected.patterns) == 8
assert len(introspected.rules) == 2
first_properties = introspected.patterns[0].properties
assert len(first_properties) == 1
first_properties['first'] == ['string']
second_properties = introspected.patterns[1].properties
assert len(second_properties) == 1
second_properties['second'] == ['digit']
third_properties = introspected.patterns[2].properties
assert len(third_properties) == 1
third_properties['third'] == ['4', '5', '6']
func_properties = introspected.patterns[3].properties
assert len(func_properties) == 1
func_properties['func'] == ['test']
regex_name_properties = introspected.patterns[4].properties
assert len(regex_name_properties) == 1
regex_name_properties['regex_name'] == [None]
regex_groups_properties = introspected.patterns[5].properties
assert len(regex_groups_properties) == 3
regex_groups_properties['one'] == [None]
regex_groups_properties['two'] == [None]
regex_groups_properties['three'] == [None]
func2_properties = introspected.patterns[6].properties
assert len(func2_properties) == 1
func2_properties['func2'] == [None]
append_third_properties = introspected.patterns[7].properties
assert len(append_third_properties) == 1
append_third_properties['third'] == [None]
properties = introspected.properties
assert len(properties) == 9
assert properties['first'] == first_properties['first']
assert properties['second'] == second_properties['second']
assert properties['third'] == third_properties['third'] + append_third_properties['third']
assert properties['func'] == func_properties['func']
assert properties['regex_name'] == regex_name_properties['regex_name']
assert properties['one'] == regex_groups_properties['one']
assert properties['two'] == regex_groups_properties['two']
assert properties['three'] == regex_groups_properties['three']
assert properties['func2'] == func2_properties['func2']
def test_rule_properties():
rebulk = Rebulk(default_rules=False).rules(RuleAppend2, RuleAppend3)
introspected = introspector.introspect(rebulk, None)
assert len(introspected.rules) == 2
assert len(introspected.patterns) == 0
rule_properties = introspected.rules[0].properties
assert len(rule_properties) == 1
assert rule_properties['renamed'] == [None]
rule_properties = introspected.rules[1].properties
assert len(rule_properties) == 1
assert rule_properties['renamed'] == [None]
properties = introspected.properties
assert len(properties) == 1
assert properties['renamed'] == [None]

View file

@ -0,0 +1,83 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
from ..loose import call
def test_loose_function():
def func(v1, v2, v3=3, v4=4):
return v1 + v2 + v3 + v4
assert call(func, 1, 2) == func(1, 2)
assert call(func, 1, 2, 3, 5) == func(1, 2, 3, 5)
assert call(func, 1, 2, v3=4, v4=5) == func(1, 2, v3=4, v4=5)
assert call(func, 1, 2, 3, 4, 5) == func(1, 2, 3, 4)
assert call(func, 1, 2, 3, 4, more=5) == func(1, 2, 3, 4)
def test_loose_varargs_function():
def func(v1, v2, *args):
return v1 + v2 + args[0] if len(args) > 0 else 3 + args[1] if len(args) > 1 else 4
assert call(func, 1, 2) == func(1, 2)
assert call(func, 1, 2, 3, 5) == func(1, 2, 3, 5)
assert call(func, 1, 2, 3, 4, 5) == func(1, 2, 3, 4)
def test_loose_kwargs_function():
def func(v1, v2, **kwargs):
return v1 + v2 + kwargs.get('v3', 3) + kwargs.get('v4', 4)
assert call(func, v1=1, v2=2) == func(v1=1, v2=2)
assert call(func, v1=1, v2=2, v3=3, v4=5) == func(v1=1, v2=2, v3=3, v4=5)
def test_loose_class():
class Dummy(object):
def __init__(self, v1, v2, v3=3, v4=4):
self.v1 = v1
self.v2 = v2
self.v3 = v3
self.v4 = v4
def call(self):
return self.v1 + self.v2 + self.v3 + self.v4
assert call(Dummy, 1, 2).call() == Dummy(1, 2).call()
assert call(Dummy, 1, 2, 3, 5).call() == Dummy(1, 2, 3, 5).call()
assert call(Dummy, 1, 2, v3=4, v4=5).call() == Dummy(1, 2, v3=4, v4=5).call()
assert call(Dummy, 1, 2, 3, 4, 5).call() == Dummy(1, 2, 3, 4).call()
assert call(Dummy, 1, 2, 3, 4, more=5).call() == Dummy(1, 2, 3, 4).call()
def test_loose_varargs_class():
class Dummy(object):
def __init__(self, v1, v2, *args):
self.v1 = v1
self.v2 = v2
self.v3 = args[0] if len(args) > 0 else 3
self.v4 = args[1] if len(args) > 1 else 4
def call(self):
return self.v1 + self.v2 + self.v3 + self.v4
assert call(Dummy, 1, 2).call() == Dummy(1, 2).call()
assert call(Dummy, 1, 2, 3, 5).call() == Dummy(1, 2, 3, 5).call()
assert call(Dummy, 1, 2, 3, 4, 5).call() == Dummy(1, 2, 3, 4).call()
def test_loose_kwargs_class():
class Dummy(object):
def __init__(self, v1, v2, **kwargs):
self.v1 = v1
self.v2 = v2
self.v3 = kwargs.get('v3', 3)
self.v4 = kwargs.get('v4', 4)
def call(self):
return self.v1 + self.v2 + self.v3 + self.v4
assert call(Dummy, v1=1, v2=2).call() == Dummy(v1=1, v2=2).call()
assert call(Dummy, v1=1, v2=2, v3=3, v4=5).call() == Dummy(v1=1, v2=2, v3=3, v4=5).call()

View file

@ -0,0 +1,568 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, unneeded-not, len-as-condition
import pytest
import six
from ..match import Match, Matches
from ..pattern import StringPattern, RePattern
from ..formatters import formatters
class TestMatchClass(object):
def test_repr(self):
match1 = Match(1, 3, value="es")
assert repr(match1) == '<es:(1, 3)>'
match2 = Match(0, 4, value="test", private=True, name="abc", tags=['one', 'two'])
assert repr(match2) == '<test:(0, 4)+private+name=abc+tags=[\'one\', \'two\']>'
def test_names(self):
parent = Match(0, 10, name="test")
parent.children.append(Match(0, 10, name="child1", parent=parent))
parent.children.append(Match(0, 10, name="child2", parent=parent))
assert set(parent.names) == set(["child1", "child2"])
def test_equality(self):
match1 = Match(1, 3, value="es")
match2 = Match(1, 3, value="es")
other = object()
assert hash(match1) == hash(match2)
assert hash(match1) != hash(other)
assert match1 == match2
assert not match1 == other
def test_inequality(self):
match1 = Match(0, 2, value="te")
match2 = Match(2, 4, value="st")
match3 = Match(0, 2, value="other")
other = object()
assert hash(match1) != hash(match2)
assert hash(match1) != hash(match3)
assert match1 != other
assert match1 != match2
assert match1 != match3
def test_length(self):
match1 = Match(0, 4, value="test")
match2 = Match(0, 2, value="spanIsUsed")
assert len(match1) == 4
assert len(match2) == 2
def test_compare(self):
match1 = Match(0, 2, value="te")
match2 = Match(2, 4, value="st")
other = object()
assert match1 < match2
assert match1 <= match2
assert match2 > match1
assert match2 >= match1
if six.PY3:
with pytest.raises(TypeError):
match1 < other
with pytest.raises(TypeError):
match1 <= other
with pytest.raises(TypeError):
match1 > other
with pytest.raises(TypeError):
match1 >= other
else:
assert match1 < other
assert match1 <= other
assert not match1 > other
assert not match1 >= other
def test_value(self):
match1 = Match(1, 3)
match1.value = "test"
assert match1.value == "test"
class TestMatchesClass(object):
match1 = Match(0, 2, value="te", name="start")
match2 = Match(2, 3, value="s", tags="tag1")
match3 = Match(3, 4, value="t", tags=["tag1", "tag2"])
match4 = Match(2, 4, value="st", name="end")
def test_tag(self):
matches = Matches()
matches.append(self.match1)
matches.append(self.match2)
matches.append(self.match3)
matches.append(self.match4)
assert "start" in matches.names
assert "end" in matches.names
assert "tag1" in matches.tags
assert "tag2" in matches.tags
tag1 = matches.tagged("tag1")
assert len(tag1) == 2
assert tag1[0] == self.match2
assert tag1[1] == self.match3
tag2 = matches.tagged("tag2")
assert len(tag2) == 1
assert tag2[0] == self.match3
start = matches.named("start")
assert len(start) == 1
assert start[0] == self.match1
end = matches.named("end")
assert len(end) == 1
assert end[0] == self.match4
def test_base(self):
matches = Matches()
matches.append(self.match1)
assert len(matches) == 1
assert repr(matches) == repr([self.match1])
assert list(matches.starting(0)) == [self.match1]
assert list(matches.ending(2)) == [self.match1]
matches.append(self.match2)
matches.append(self.match3)
matches.append(self.match4)
assert len(matches) == 4
assert list(matches.starting(2)) == [self.match2, self.match4]
assert list(matches.starting(3)) == [self.match3]
assert list(matches.ending(3)) == [self.match2]
assert list(matches.ending(4)) == [self.match3, self.match4]
assert list(matches.range()) == [self.match1, self.match2, self.match4, self.match3]
assert list(matches.range(0)) == [self.match1, self.match2, self.match4, self.match3]
assert list(matches.range(0, 3)) == [self.match1, self.match2, self.match4]
assert list(matches.range(2, 3)) == [self.match2, self.match4]
assert list(matches.range(3, 4)) == [self.match4, self.match3]
matches.remove(self.match1)
assert len(matches) == 3
assert len(matches.starting(0)) == 0
assert len(matches.ending(2)) == 0
matches.clear()
assert len(matches) == 0
assert len(matches.starting(0)) == 0
assert len(matches.starting(2)) == 0
assert len(matches.starting(3)) == 0
assert len(matches.ending(2)) == 0
assert len(matches.ending(3)) == 0
assert len(matches.ending(4)) == 0
def test_get_slices(self):
matches = Matches()
matches.append(self.match1)
matches.append(self.match2)
matches.append(self.match3)
matches.append(self.match4)
slice_matches = matches[1:3]
assert isinstance(slice_matches, Matches)
assert len(slice_matches) == 2
assert slice_matches[0] == self.match2
assert slice_matches[1] == self.match3
def test_remove_slices(self):
matches = Matches()
matches.append(self.match1)
matches.append(self.match2)
matches.append(self.match3)
matches.append(self.match4)
del matches[1:3]
assert len(matches) == 2
assert matches[0] == self.match1
assert matches[1] == self.match4
def test_set_slices(self):
matches = Matches()
matches.append(self.match1)
matches.append(self.match2)
matches.append(self.match3)
matches.append(self.match4)
matches[1:3] = self.match1, self.match4
assert len(matches) == 4
assert matches[0] == self.match1
assert matches[1] == self.match1
assert matches[2] == self.match4
assert matches[3] == self.match4
def test_set_index(self):
matches = Matches()
matches.append(self.match1)
matches.append(self.match2)
matches.append(self.match3)
matches[1] = self.match4
assert len(matches) == 3
assert matches[0] == self.match1
assert matches[1] == self.match4
assert matches[2] == self.match3
def test_constructor(self):
matches = Matches([self.match1, self.match2, self.match3, self.match4])
assert len(matches) == 4
assert list(matches.starting(0)) == [self.match1]
assert list(matches.ending(2)) == [self.match1]
assert list(matches.starting(2)) == [self.match2, self.match4]
assert list(matches.starting(3)) == [self.match3]
assert list(matches.ending(3)) == [self.match2]
assert list(matches.ending(4)) == [self.match3, self.match4]
def test_constructor_kwargs(self):
matches = Matches([self.match1, self.match2, self.match3, self.match4], input_string="test")
assert len(matches) == 4
assert matches.input_string == "test"
assert list(matches.starting(0)) == [self.match1]
assert list(matches.ending(2)) == [self.match1]
assert list(matches.starting(2)) == [self.match2, self.match4]
assert list(matches.starting(3)) == [self.match3]
assert list(matches.ending(3)) == [self.match2]
assert list(matches.ending(4)) == [self.match3, self.match4]
def test_crop(self):
input_string = "abcdefghijklmnopqrstuvwxyz"
match1 = Match(1, 10, input_string=input_string)
match2 = Match(0, 2, input_string=input_string)
match3 = Match(8, 15, input_string=input_string)
ret = match1.crop([match2, match3.span])
assert len(ret) == 1
assert ret[0].span == (2, 8)
assert ret[0].value == "cdefgh"
ret = match1.crop((1, 10))
assert len(ret) == 0
ret = match1.crop((1, 3))
assert len(ret) == 1
assert ret[0].span == (3, 10)
ret = match1.crop((7, 10))
assert len(ret) == 1
assert ret[0].span == (1, 7)
ret = match1.crop((0, 12))
assert len(ret) == 0
ret = match1.crop((4, 6))
assert len(ret) == 2
assert ret[0].span == (1, 4)
assert ret[1].span == (6, 10)
ret = match1.crop([(3, 5), (7, 9)])
assert len(ret) == 3
assert ret[0].span == (1, 3)
assert ret[1].span == (5, 7)
assert ret[2].span == (9, 10)
def test_split(self):
input_string = "123 +word1 - word2 + word3 456"
match = Match(3, len(input_string) - 3, input_string=input_string)
splitted = match.split(" -+")
assert len(splitted) == 3
assert [split.value for split in splitted] == ["word1", "word2", "word3"]
class TestMaches(object):
def test_names(self):
input_string = "One Two Three"
matches = Matches()
matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string))
matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string))
matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string))
matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string))
matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string))
matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string))
assert set(matches.names) == set(["1-str", "1-re", "2-str", "2-re", "3-str", "3-re"])
def test_filters(self):
input_string = "One Two Three"
matches = Matches()
matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string))
matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string))
matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string))
matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string))
matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string))
matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string))
selection = matches.starting(0)
assert len(selection) == 2
selection = matches.starting(0, lambda m: "str" in m.tags)
assert len(selection) == 1
assert selection[0].pattern.name == "1-str"
selection = matches.ending(7, predicate=lambda m: "str" in m.tags)
assert len(selection) == 1
assert selection[0].pattern.name == "2-str"
selection = matches.previous(matches.named("2-str")[0])
assert len(selection) == 2
assert selection[0].pattern.name == "1-str"
assert selection[1].pattern.name == "1-re"
selection = matches.previous(matches.named("2-str", 0), lambda m: "str" in m.tags)
assert len(selection) == 1
assert selection[0].pattern.name == "1-str"
selection = matches.next(matches.named("2-str", 0))
assert len(selection) == 2
assert selection[0].pattern.name == "3-str"
assert selection[1].pattern.name == "3-re"
selection = matches.next(matches.named("2-str", 0), index=0, predicate=lambda m: "re" in m.tags)
assert selection is not None
assert selection.pattern.name == "3-re"
selection = matches.next(matches.named("2-str", index=0), lambda m: "re" in m.tags)
assert len(selection) == 1
assert selection[0].pattern.name == "3-re"
selection = matches.named("2-str", lambda m: "re" in m.tags)
assert len(selection) == 0
selection = matches.named("2-re", lambda m: "re" in m.tags, 0)
assert selection is not None
assert selection.name == "2-re" # pylint:disable=no-member
selection = matches.named("2-re", lambda m: "re" in m.tags)
assert len(selection) == 1
assert selection[0].name == "2-re"
selection = matches.named("2-re", lambda m: "re" in m.tags, index=1000)
assert selection is None
def test_raw(self):
input_string = "0123456789"
match = Match(0, 10, input_string=input_string, formatter=lambda s: s*2)
assert match.value == match.raw * 2
assert match.raw == input_string
match.raw_end = 9
match.raw_start = 1
assert match.value == match.raw * 2
assert match.raw == input_string[1:9]
match.raw_end = None
match.raw_start = None
assert match.value == match.raw * 2
assert match.raw == input_string
def test_formatter_chain(self):
input_string = "100"
match = Match(0, 3, input_string=input_string, formatter=formatters(int, lambda s: s*2, lambda s: s+10))
assert match.raw == input_string
assert match.value == 100 * 2 + 10
def test_to_dict(self):
input_string = "One Two Two Three"
matches = Matches()
matches.extend(StringPattern("One", name="1", tags=["One", "str"]).matches(input_string))
matches.extend(RePattern("One", name="1", tags=["One", "re"]).matches(input_string))
matches.extend(StringPattern("Two", name="2", tags=["Two", "str"]).matches(input_string))
matches.extend(RePattern("Two", name="2", tags=["Two", "re"]).matches(input_string))
matches.extend(RePattern("Two", name="2", tags=["Two", "reBis"]).matches(input_string))
matches.extend(StringPattern("Three", name="3", tags=["Three", "str"]).matches(input_string))
matches.extend(RePattern("Three", name="3bis", tags=["Three", "re"]).matches(input_string))
matches.extend(RePattern(r"(\w+)", name="words").matches(input_string))
kvalues = matches.to_dict(first_value=True)
assert kvalues == {"1": "One",
"2": "Two",
"3": "Three",
"3bis": "Three",
"words": "One"}
assert kvalues.values_list["words"] == ["One", "Two", "Three"]
kvalues = matches.to_dict(enforce_list=True)
assert kvalues["words"] == ["One", "Two", "Three"]
kvalues = matches.to_dict(details=True)
assert kvalues["1"].value == "One"
assert len(kvalues["2"]) == 2
assert kvalues["2"][0].value == "Two"
assert kvalues["2"][1].value == "Two"
assert kvalues["3"].value == "Three"
assert kvalues["3bis"].value == "Three"
assert len(kvalues["words"]) == 4
assert kvalues["words"][0].value == "One"
assert kvalues["words"][1].value == "Two"
assert kvalues["words"][2].value == "Two"
assert kvalues["words"][3].value == "Three"
kvalues = matches.to_dict(details=True)
assert kvalues["1"].value == "One"
assert len(kvalues.values_list["2"]) == 2
assert kvalues.values_list["2"][0].value == "Two"
assert kvalues.values_list["2"][1].value == "Two"
assert kvalues["3"].value == "Three"
assert kvalues["3bis"].value == "Three"
assert len(kvalues.values_list["words"]) == 4
assert kvalues.values_list["words"][0].value == "One"
assert kvalues.values_list["words"][1].value == "Two"
assert kvalues.values_list["words"][2].value == "Two"
assert kvalues.values_list["words"][3].value == "Three"
def test_chains(self):
input_string = "wordX 10 20 30 40 wordA, wordB, wordC 70 80 wordX"
matches = Matches(input_string=input_string)
matches.extend(RePattern(r"\d+", name="digit").matches(input_string))
matches.extend(RePattern("[a-zA-Z]+", name="word").matches(input_string))
assert len(matches) == 11
a_start = input_string.find('wordA')
b_start = input_string.find('wordB')
b_end = b_start + len('wordB')
c_start = input_string.find('wordC')
c_end = c_start + len('wordC')
chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "word")
assert len(chain_before) == 1
assert chain_before[0].value == 'wordA'
chain_before = matches.chain_before(Match(b_start, b_start), " ,", predicate=lambda match: match.name == "word")
assert len(chain_before) == 1
assert chain_before[0].value == 'wordA'
chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "digit")
assert len(chain_before) == 0
chain_before = matches.chain_before(a_start, " ,", predicate=lambda match: match.name == "digit")
assert len(chain_before) == 4
assert [match.value for match in chain_before] == ["40", "30", "20", "10"]
chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "word")
assert len(chain_after) == 1
assert chain_after[0].value == 'wordC'
chain_after = matches.chain_after(Match(b_end, b_end), " ,", predicate=lambda match: match.name == "word")
assert len(chain_after) == 1
assert chain_after[0].value == 'wordC'
chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "digit")
assert len(chain_after) == 0
chain_after = matches.chain_after(c_end, " ,", predicate=lambda match: match.name == "digit")
assert len(chain_after) == 2
assert [match.value for match in chain_after] == ["70", "80"]
chain_after = matches.chain_after(c_end, " ,", end=10000, predicate=lambda match: match.name == "digit")
assert len(chain_after) == 2
assert [match.value for match in chain_after] == ["70", "80"]
def test_holes(self):
input_string = '1'*10+'2'*10+'3'*10+'4'*10+'5'*10+'6'*10+'7'*10
hole1 = Match(0, 10, input_string=input_string)
hole2 = Match(20, 30, input_string=input_string)
hole3 = Match(30, 40, input_string=input_string)
hole4 = Match(60, 70, input_string=input_string)
matches = Matches([hole1, hole2], input_string=input_string)
matches.append(hole3)
matches.append(hole4)
holes = list(matches.holes())
assert len(holes) == 2
assert holes[0].span == (10, 20)
assert holes[0].value == '2'*10
assert holes[1].span == (40, 60)
assert holes[1].value == '5' * 10 + '6' * 10
holes = list(matches.holes(5, 15))
assert len(holes) == 1
assert holes[0].span == (10, 15)
assert holes[0].value == '2'*5
holes = list(matches.holes(5, 15, formatter=lambda value: "formatted"))
assert len(holes) == 1
assert holes[0].span == (10, 15)
assert holes[0].value == "formatted"
holes = list(matches.holes(5, 15, predicate=lambda hole: False))
assert len(holes) == 0
def test_holes_empty(self):
input_string = "Test hole on empty matches"
matches = Matches(input_string=input_string)
holes = matches.holes()
assert len(holes) == 1
assert holes[0].value == input_string
def test_holes_seps(self):
input_string = "Test hole - with many separators + included"
match = StringPattern("many").matches(input_string)
matches = Matches(match, input_string)
holes = matches.holes()
assert len(holes) == 2
holes = matches.holes(seps="-+")
assert len(holes) == 4
assert [hole.value for hole in holes] == ["Test hole ", " with ", " separators ", " included"]

View file

@ -0,0 +1,858 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, unbalanced-tuple-unpacking, len-as-condition
import re
import pytest
from ..pattern import StringPattern, RePattern, FunctionalPattern, REGEX_AVAILABLE
from ..match import Match
class TestStringPattern(object):
"""
Tests for StringPattern matching
"""
input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \
"which were the Hebrew letter qoph."
def test_single(self):
pattern = StringPattern("Celtic")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (28, 34)
assert matches[0].value == "Celtic"
def test_repr(self):
pattern = StringPattern("Celtic")
assert repr(pattern) == '<StringPattern:(\'Celtic\',)>'
def test_ignore_case(self):
pattern = StringPattern("celtic", ignore_case=False)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
pattern = StringPattern("celtic", ignore_case=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert matches[0].value == "Celtic"
def test_private_names(self):
pattern = StringPattern("celtic", name="test", private_names=["test"], ignore_case=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert matches[0].private
def test_ignore_names(self):
pattern = StringPattern("celtic", name="test", ignore_names=["test"], ignore_case=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
def test_no_match(self):
pattern = StringPattern("Python")
matches = list(pattern.matches(self.input_string))
assert not matches
def test_multiple_patterns(self):
pattern = StringPattern("playing", "annoyed", "Hebrew")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 3
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (18, 25)
assert matches[0].value == "playing"
assert isinstance(matches[1], Match)
assert matches[1].pattern == pattern
assert matches[1].span == (46, 53)
assert matches[1].value == "annoyed"
assert isinstance(matches[2], Match)
assert matches[2].pattern == pattern
assert matches[2].span == (88, 94)
assert matches[2].value == "Hebrew"
def test_start_end_kwargs(self):
pattern = StringPattern("Abyssinian", start=20, end=40)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
def test_matches_kwargs(self):
pattern = StringPattern("Abyssinian", name="test", value="AB")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert matches[0].name == "test"
assert matches[0].value == "AB"
def test_post_processor(self):
def post_processor(matches, pattern):
assert len(matches) == 1
assert isinstance(pattern, StringPattern)
return []
pattern = StringPattern("Abyssinian", name="test", value="AB", post_processor=post_processor)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
class TestRePattern(object):
"""
Tests for RePattern matching
"""
input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \
"which were the Hebrew letter qoph."
def test_single_compiled(self):
pattern = RePattern(re.compile("Celt.?c"))
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (28, 34)
assert matches[0].value == "Celtic"
def test_single_string(self):
pattern = RePattern("Celt.?c")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (28, 34)
assert matches[0].value == "Celtic"
def test_single_kwargs(self):
pattern = RePattern({"pattern": "celt.?c", "flags": re.IGNORECASE})
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (28, 34)
assert matches[0].value == "Celtic"
def test_single_vargs(self):
pattern = RePattern(("celt.?c", re.IGNORECASE))
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (28, 34)
assert matches[0].value == "Celtic"
def test_no_match(self):
pattern = RePattern("abc.?def")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
def test_shortcuts(self):
pattern = RePattern("Celtic-violin", abbreviations=[("-", r"[\W_]+")])
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
pattern = RePattern({"pattern": "celtic-violin", "flags": re.IGNORECASE}, abbreviations=[("-", r"[\W_]+")])
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
def test_multiple_patterns(self):
pattern = RePattern("pla.?ing", "ann.?yed", "Heb.?ew")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 3
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (18, 25)
assert matches[0].value == "playing"
assert isinstance(matches[1], Match)
assert matches[1].pattern == pattern
assert matches[1].span == (46, 53)
assert matches[1].value == "annoyed"
assert isinstance(matches[2], Match)
assert matches[2].pattern == pattern
assert matches[2].span == (88, 94)
assert matches[2].value == "Hebrew"
def test_unnamed_groups(self):
pattern = RePattern(r"(Celt.?c)\s+(\w+)")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
parent = matches[0]
assert isinstance(parent, Match)
assert parent.pattern == pattern
assert parent.span == (28, 41)
assert parent.name is None
assert parent.value == "Celtic violin"
assert len(parent.children) == 2
group1, group2 = parent.children
assert isinstance(group1, Match)
assert group1.pattern == pattern
assert group1.span == (28, 34)
assert group1.name is None
assert group1.value == "Celtic"
assert group1.parent == parent
assert isinstance(group2, Match)
assert group2.pattern == pattern
assert group2.span == (35, 41)
assert group2.name is None
assert group2.value == "violin"
assert group2.parent == parent
def test_named_groups(self):
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
parent = matches[0]
assert isinstance(parent, Match)
assert parent.pattern == pattern
assert parent.span == (28, 41)
assert parent.name is None
assert parent.value == "Celtic violin"
assert len(parent.children) == 2
group1, group2 = parent.children
assert isinstance(group1, Match)
assert group1.pattern == pattern
assert group1.span == (28, 34)
assert group1.name == "param1"
assert group1.value == "Celtic"
assert group1.parent == parent
assert isinstance(group2, Match)
assert group2.pattern == pattern
assert group2.span == (35, 41)
assert group2.name == "param2"
assert group2.value == "violin"
assert group2.parent == parent
def test_children(self):
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", children=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 2
group1, group2 = matches
assert isinstance(group1, Match)
assert group1.pattern == pattern
assert group1.span == (28, 34)
assert group1.name == "param1"
assert group1.value == "Celtic"
assert isinstance(group2, Match)
assert group2.pattern == pattern
assert group2.span == (35, 41)
assert group2.name == "param2"
assert group2.value == "violin"
def test_children_parent_private(self):
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", children=True, private_parent=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 3
parent, group1, group2 = matches
assert isinstance(group1, Match)
assert parent.private
assert parent.pattern == pattern
assert parent.span == (28, 41)
assert parent.name is None
assert parent.value == "Celtic violin"
assert isinstance(group1, Match)
assert not group1.private
assert group1.pattern == pattern
assert group1.span == (28, 34)
assert group1.name == "param1"
assert group1.value == "Celtic"
assert isinstance(group2, Match)
assert not group2.private
assert group2.pattern == pattern
assert group2.span == (35, 41)
assert group2.name == "param2"
assert group2.value == "violin"
def test_parent_children_private(self):
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", private_children=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 3
parent, group1, group2 = matches
assert isinstance(group1, Match)
assert not parent.private
assert parent.pattern == pattern
assert parent.span == (28, 41)
assert parent.name is None
assert parent.value == "Celtic violin"
assert isinstance(group1, Match)
assert group1.private
assert group1.pattern == pattern
assert group1.span == (28, 34)
assert group1.name == "param1"
assert group1.value == "Celtic"
assert isinstance(group2, Match)
assert group2.private
assert group2.pattern == pattern
assert group2.span == (35, 41)
assert group2.name == "param2"
assert group2.value == "violin"
def test_every(self):
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", every=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 3
parent, group1, group2 = matches
assert isinstance(group1, Match)
assert not parent.private
assert parent.pattern == pattern
assert parent.span == (28, 41)
assert parent.name is None
assert parent.value == "Celtic violin"
assert isinstance(group1, Match)
assert not group1.private
assert group1.pattern == pattern
assert group1.span == (28, 34)
assert group1.name == "param1"
assert group1.value == "Celtic"
assert isinstance(group2, Match)
assert not group2.private
assert group2.pattern == pattern
assert group2.span == (35, 41)
assert group2.name == "param2"
assert group2.value == "violin"
def test_private_names(self):
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", private_names=["param2"], children=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 2
assert matches[0].name == "param1"
assert not matches[0].private
assert matches[1].name == "param2"
assert matches[1].private
def test_ignore_names(self):
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", ignore_names=["param2"], children=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert matches[0].name == "param1"
def test_matches_kwargs(self):
pattern = RePattern("He.rew", name="test", value="HE")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert matches[0].name == "test"
assert matches[0].value == "HE"
pattern = RePattern("H(e.)(rew)", name="test", value="HE")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert matches[0].name == "test"
assert matches[0].value == "HE"
children = matches[0].children
assert len(children) == 2
assert children[0].name == "test"
assert children[0].value == "HE"
assert children[1].name == "test"
assert children[1].value == "HE"
pattern = RePattern("H(?P<first>e.)(?P<second>rew)", name="test", value="HE")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert matches[0].name == "test"
assert matches[0].value == "HE"
children = matches[0].children
assert len(children) == 2
assert children[0].name == "first"
assert children[0].value == "HE"
assert children[1].name == "second"
assert children[1].value == "HE"
class TestFunctionalPattern(object):
"""
Tests for FunctionalPattern matching
"""
input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \
"which were the Hebrew letter qoph."
def test_single_vargs(self):
def func(input_string):
i = input_string.find("fly")
if i > -1:
return i, i + len("fly"), "fly", "functional"
pattern = FunctionalPattern(func)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (14, 17)
assert matches[0].name == "functional"
assert matches[0].value == "fly"
def test_single_kwargs(self):
def func(input_string):
i = input_string.find("fly")
if i > -1:
return {"start": i, "end": i + len("fly"), "name": "functional"}
pattern = FunctionalPattern(func)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (14, 17)
assert matches[0].name == "functional"
assert matches[0].value == "fly"
def test_multiple_objects(self):
def func(input_string):
i = input_string.find("fly")
matches = []
if i > -1:
matches.append((i, i + len("fly"), {'name': "functional"}))
i = input_string.find("annoyed")
if i > -1:
matches.append((i, i + len("annoyed")))
i = input_string.find("Hebrew")
if i > -1:
matches.append({"start": i, "end": i + len("Hebrew")})
return matches
pattern = FunctionalPattern(func)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 3
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (14, 17)
assert matches[0].name == "functional"
assert matches[0].value == "fly"
assert isinstance(matches[1], Match)
assert matches[1].pattern == pattern
assert matches[1].span == (46, 53)
assert matches[1].value == "annoyed"
assert isinstance(matches[2], Match)
assert matches[2].pattern == pattern
assert matches[2].span == (88, 94)
assert matches[2].value == "Hebrew"
def test_multiple_generator(self):
def func(input_string):
i = input_string.find("fly")
if i > -1:
yield (i, i + len("fly"), {'name': "functional"})
i = input_string.find("annoyed")
if i > -1:
yield (i, i + len("annoyed"))
i = input_string.find("Hebrew")
if i > -1:
yield (i, {"end": i + len("Hebrew")})
pattern = FunctionalPattern(func)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 3
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (14, 17)
assert matches[0].name == "functional"
assert matches[0].value == "fly"
assert isinstance(matches[1], Match)
assert matches[1].pattern == pattern
assert matches[1].span == (46, 53)
assert matches[1].value == "annoyed"
assert isinstance(matches[2], Match)
assert matches[2].pattern == pattern
assert matches[2].span == (88, 94)
assert matches[2].value == "Hebrew"
def test_no_match(self):
pattern = FunctionalPattern(lambda x: None)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
def test_multiple_patterns(self):
def playing(input_string):
i = input_string.find("playing")
if i > -1:
return i, i + len("playing")
def annoyed(input_string):
i = input_string.find("annoyed")
if i > -1:
return i, i + len("annoyed")
def hebrew(input_string):
i = input_string.find("Hebrew")
if i > -1:
return i, i + len("Hebrew")
pattern = FunctionalPattern(playing, annoyed, hebrew)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 3
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (18, 25)
assert matches[0].value == "playing"
assert isinstance(matches[1], Match)
assert matches[1].pattern == pattern
assert matches[1].span == (46, 53)
assert matches[1].value == "annoyed"
assert isinstance(matches[2], Match)
assert matches[2].pattern == pattern
assert matches[2].span == (88, 94)
assert matches[2].value == "Hebrew"
def test_matches_kwargs(self):
def playing(input_string):
i = input_string.find("playing")
if i > -1:
return i, i + len("playing")
pattern = FunctionalPattern(playing, name="test", value="PLAY")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert matches[0].name == "test"
assert matches[0].value == "PLAY"
class TestValue(object):
"""
Tests for value option
"""
input_string = "This string contains 1849 a number"
def test_str_value(self):
pattern = StringPattern("1849", name="dummy", value="test")
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (21, 25)
assert matches[0].value == "test"
def test_dict_child_value(self):
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
formatter={'intParam': lambda x: int(x) * 2,
'strParam': lambda x: "really " + x},
format_all=True,
value={'intParam': 'INT_PARAM_VALUE'})
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
parent = matches[0]
assert len(parent.children) == 2
group1, group2 = parent.children
assert isinstance(group1, Match)
assert group1.pattern == pattern
assert group1.span == (12, 20)
assert group1.value == "really contains"
assert isinstance(group2, Match)
assert group2.pattern == pattern
assert group2.span == (21, 25)
assert group2.value == 'INT_PARAM_VALUE'
def test_dict_default_value(self):
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
formatter={'intParam': lambda x: int(x) * 2,
'strParam': lambda x: "really " + x},
format_all=True,
value={'__children__': 'CHILD', 'strParam': 'STR_VALUE', '__parent__': 'PARENT'})
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
parent = matches[0]
assert parent.value == "PARENT"
assert len(parent.children) == 2
group1, group2 = parent.children
assert isinstance(group1, Match)
assert group1.pattern == pattern
assert group1.span == (12, 20)
assert group1.value == "STR_VALUE"
assert isinstance(group2, Match)
assert group2.pattern == pattern
assert group2.span == (21, 25)
assert group2.value == "CHILD"
class TestFormatter(object):
"""
Tests for formatter option
"""
input_string = "This string contains 1849 a number"
def test_single_string(self):
pattern = StringPattern("1849", name="dummy", formatter=lambda x: int(x) / 2)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (21, 25)
assert matches[0].value == 1849 / 2
def test_single_re_no_group(self):
pattern = RePattern(r"\d+", formatter=lambda x: int(x) * 2)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (21, 25)
assert matches[0].value == 1849 * 2
def test_single_re_named_groups(self):
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
formatter={'intParam': lambda x: int(x) * 2,
'strParam': lambda x: "really " + x}, format_all=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
parent = matches[0]
assert len(parent.children) == 2
group1, group2 = parent.children
assert isinstance(group1, Match)
assert group1.pattern == pattern
assert group1.span == (12, 20)
assert group1.value == "really contains"
assert isinstance(group2, Match)
assert group2.pattern == pattern
assert group2.span == (21, 25)
assert group2.value == 1849 * 2
def test_repeated_captures_option(self):
pattern = RePattern(r"\[(\d+)\](?:-(\d+))*")
matches = list(pattern.matches("[02]-03-04-05-06"))
assert len(matches) == 1
match = matches[0]
if REGEX_AVAILABLE:
assert len(match.children) == 5
assert [child.value for child in match.children] == ["02", "03", "04", "05", "06"]
else:
assert len(match.children) == 2
assert [child.value for child in match.children] == ["02", "06"]
with pytest.raises(NotImplementedError):
RePattern(r"\[(\d+)\](?:-(\d+))*", repeated_captures=True)
pattern = RePattern(r"\[(\d+)\](?:-(\d+))*", repeated_captures=False)
matches = list(pattern.matches("[02]-03-04-05-06"))
assert len(matches) == 1
match = matches[0]
assert len(match.children) == 2
assert [child.value for child in match.children] == ["02", "06"]
def test_single_functional(self):
def digit(input_string):
i = input_string.find("1849")
if i > -1:
return i, i + len("1849")
pattern = FunctionalPattern(digit, formatter=lambda x: int(x) * 3)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
assert isinstance(matches[0], Match)
assert matches[0].pattern == pattern
assert matches[0].span == (21, 25)
assert matches[0].value == 1849 * 3
class TestValidator(object):
"""
Tests for validator option
"""
input_string = "This string contains 1849 a number"
@staticmethod
def true_validator(match):
return int(match.value) < 1850
@staticmethod
def false_validator(match):
return int(match.value) >= 1850
def test_single_string(self):
pattern = StringPattern("1849", name="dummy", validator=self.false_validator)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
pattern = StringPattern("1849", validator=self.true_validator)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
def test_single_re_no_group(self):
pattern = RePattern(r"\d+", validator=self.false_validator)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
pattern = RePattern(r"\d+", validator=self.true_validator)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
def test_single_re_named_groups(self):
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
validator={'intParam': self.false_validator}, validate_all=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
validator={'intParam': self.true_validator}, validate_all=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
def test_validate_all(self):
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=lambda match: match.value < 100,
children=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=lambda match: match.value > 100,
children=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
def invalid_func(match):
if match.name == 'intParam':
return True
return match.value.startswith('abc')
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=invalid_func, validate_all=True,
children=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
def func(match):
if match.name == 'intParam':
return True
return match.value.startswith('contains')
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=func, validate_all=True,
children=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
def test_format_all(self):
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int,
children=True)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1
for match in matches:
assert match.value is not None
with pytest.raises(ValueError):
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, format_all=True)
matches = list(pattern.matches(self.input_string))
for match in matches:
assert match.value is not None
def test_single_functional(self):
def digit(input_string):
i = input_string.find("1849")
if i > -1:
return i, i + len("1849")
pattern = FunctionalPattern(digit, validator=self.false_validator)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0
pattern = FunctionalPattern(digit, validator=self.true_validator)
matches = list(pattern.matches(self.input_string))
assert len(matches) == 1

View file

@ -0,0 +1,215 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
from ..pattern import StringPattern, RePattern
from ..processors import ConflictSolver
from ..rules import execute_rule
from ..match import Matches
def test_conflict_1():
input_string = "abcdefghijklmnopqrstuvwxyz"
pattern = StringPattern("ijklmn", "kl", "abcdef", "ab", "ef", "yz")
matches = Matches(pattern.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
values = [x.value for x in matches]
assert values == ["ijklmn", "abcdef", "yz"]
def test_conflict_2():
input_string = "abcdefghijklmnopqrstuvwxyz"
pattern = StringPattern("ijklmn", "jklmnopqrst")
matches = Matches(pattern.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
values = [x.value for x in matches]
assert values == ["jklmnopqrst"]
def test_conflict_3():
input_string = "abcdefghijklmnopqrstuvwxyz"
pattern = StringPattern("ijklmnopqrst", "jklmnopqrst")
matches = Matches(pattern.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
values = [x.value for x in matches]
assert values == ["ijklmnopqrst"]
def test_conflict_4():
input_string = "123456789"
pattern = StringPattern("123", "456789")
matches = Matches(pattern.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
values = [x.value for x in matches]
assert values == ["123", "456789"]
def test_conflict_5():
input_string = "123456789"
pattern = StringPattern("123456", "789")
matches = Matches(pattern.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
values = [x.value for x in matches]
assert values == ["123456", "789"]
def test_prefer_longer_parent():
input_string = "xxx.1x02.xxx"
re1 = RePattern("([0-9]+)x([0-9]+)", name='prefer', children=True, formatter=int)
re2 = RePattern("x([0-9]+)", name='skip', children=True)
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 2
assert matches[0].value == 1
assert matches[1].value == 2
def test_conflict_solver_1():
input_string = "123456789"
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: '__default__')
re2 = StringPattern("34567")
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 1
assert matches[0].value == "2345678"
def test_conflict_solver_2():
input_string = "123456789"
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: '__default__')
re2 = StringPattern("34567", conflict_solver=lambda match, conflicting: conflicting)
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 1
assert matches[0].value == "34567"
def test_conflict_solver_3():
input_string = "123456789"
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: match)
re2 = StringPattern("34567")
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 1
assert matches[0].value == "34567"
def test_conflict_solver_4():
input_string = "123456789"
re1 = StringPattern("2345678")
re2 = StringPattern("34567", conflict_solver=lambda match, conflicting: conflicting)
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 1
assert matches[0].value == "34567"
def test_conflict_solver_5():
input_string = "123456789"
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: conflicting)
re2 = StringPattern("34567")
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 1
assert matches[0].value == "2345678"
def test_conflict_solver_6():
input_string = "123456789"
re1 = StringPattern("2345678")
re2 = StringPattern("34567", conflict_solver=lambda match, conflicting: conflicting)
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 1
assert matches[0].value == "34567"
def test_conflict_solver_7():
input_string = "102"
re1 = StringPattern("102")
re2 = StringPattern("02")
matches = Matches(re2.matches(input_string))
matches.extend(re1.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 1
assert matches[0].value == "102"
def test_unresolved():
input_string = "123456789"
re1 = StringPattern("23456")
re2 = StringPattern("34567")
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 2
re1 = StringPattern("34567")
re2 = StringPattern("2345678", conflict_solver=lambda match, conflicting: None)
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 2
re1 = StringPattern("34567", conflict_solver=lambda match, conflicting: None)
re2 = StringPattern("2345678")
matches = Matches(re1.matches(input_string))
matches.extend(re2.matches(input_string))
execute_rule(ConflictSolver(), matches, None)
assert len(matches) == 2

View file

@ -0,0 +1,419 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
from ..rebulk import Rebulk
from ..rules import Rule
from . import rebulk_rules_module as rm
def test_rebulk_simple():
rebulk = Rebulk()
rebulk.string("quick")
rebulk.regex("f.x")
def func(input_string):
i = input_string.find("over")
if i > -1:
return i, i + len("over")
rebulk.functional(func)
input_string = "The quick brown fox jumps over the lazy dog"
matches = rebulk.matches(input_string)
assert len(matches) == 3
assert matches[0].value == "quick"
assert matches[1].value == "fox"
assert matches[2].value == "over"
def test_rebulk_composition():
rebulk = Rebulk()
rebulk.string("quick")
rebulk.rebulk(Rebulk().regex("f.x"))
rebulk.rebulk(Rebulk(disabled=lambda context: True).functional(lambda string: None))
input_string = "The quick brown fox jumps over the lazy dog"
matches = rebulk.matches(input_string)
assert len(matches) == 2
assert matches[0].value == "quick"
assert matches[1].value == "fox"
def test_rebulk_context():
rebulk = Rebulk()
context = {'nostring': True, 'word': 'lazy'}
rebulk.string("quick", disabled=lambda context: context.get('nostring', False))
rebulk.regex("f.x", disabled=lambda context: context.get('noregex', False))
def func(input_string, context):
word = context.get('word', 'over')
i = input_string.find(word)
if i > -1:
return i, i + len(word)
rebulk.functional(func)
input_string = "The quick brown fox jumps over the lazy dog"
matches = rebulk.matches(input_string, context)
assert len(matches) == 2
assert matches[0].value == "fox"
assert matches[1].value == "lazy"
def test_rebulk_prefer_longer():
input_string = "The quick brown fox jumps over the lazy dog"
matches = Rebulk().string("quick").string("own").regex("br.{2}n").matches(input_string)
assert len(matches) == 2
assert matches[0].value == "quick"
assert matches[1].value == "brown"
def test_rebulk_defaults():
input_string = "The quick brown fox jumps over the lazy dog"
def func(input_string):
i = input_string.find("fox")
if i > -1:
return i, i + len("fox")
matches = Rebulk()\
.string_defaults(name="string", tags=["a", "b"])\
.regex_defaults(name="regex") \
.functional_defaults(name="functional") \
.string("quick", tags=["c"])\
.functional(func)\
.regex("br.{2}n") \
.matches(input_string)
assert matches[0].name == "string"
assert matches[0].tags == ["a", "b", "c"]
assert matches[1].name == "functional"
assert matches[2].name == "regex"
matches = Rebulk() \
.defaults(name="default", tags=["0"])\
.string_defaults(name="string", tags=["a", "b"]) \
.functional_defaults(name="functional", tags=["1"]) \
.string("quick", tags=["c"]) \
.functional(func) \
.regex("br.{2}n") \
.matches(input_string)
assert matches[0].name == "string"
assert matches[0].tags == ["0", "a", "b", "c"]
assert matches[1].name == "functional"
assert matches[1].tags == ["0", "1"]
assert matches[2].name == "default"
assert matches[2].tags == ["0"]
def test_rebulk_rebulk():
input_string = "The quick brown fox jumps over the lazy dog"
base = Rebulk().string("quick")
child = Rebulk().string("own").regex("br.{2}n")
matches = base.rebulk(child).matches(input_string)
assert len(matches) == 2
assert matches[0].value == "quick"
assert matches[1].value == "brown"
def test_rebulk_no_default():
input_string = "The quick brown fox jumps over the lazy dog"
matches = Rebulk(default_rules=False).string("quick").string("own").regex("br.{2}n").matches(input_string)
assert len(matches) == 3
assert matches[0].value == "quick"
assert matches[1].value == "own"
assert matches[2].value == "brown"
def test_rebulk_empty_match():
input_string = "The quick brown fox jumps over the lazy dog"
matches = Rebulk(default_rules=False).string("quick").string("own").regex("br(.*?)own", children=True)\
.matches(input_string)
assert len(matches) == 2
assert matches[0].value == "quick"
assert matches[1].value == "own"
def test_rebulk_tags_names():
rebulk = Rebulk()
rebulk.string("quick", name="str", tags=["first", "other"])
rebulk.regex("f.x", tags="other")
def func(input_string):
i = input_string.find("over")
if i > -1:
return i, i + len("over"), {'tags': ['custom']}
rebulk.functional(func, name="fn")
def func2(input_string):
i = input_string.find("lazy")
if i > -1:
return {'start': i, 'end': i + len("lazy"), 'tags': ['custom']}
rebulk.functional(func2, name="fn")
input_string = "The quick brown fox jumps over the lazy dog"
matches = rebulk.matches(input_string)
assert len(matches) == 4
assert len(matches.named("str")) == 1
assert len(matches.named("fn")) == 2
assert len(matches.named("false")) == 0
assert len(matches.tagged("false")) == 0
assert len(matches.tagged("first")) == 1
assert len(matches.tagged("other")) == 2
assert len(matches.tagged("custom")) == 2
def test_rebulk_rules_1():
rebulk = Rebulk()
rebulk.regex(r'\d{4}', name="year")
rebulk.rules(rm.RemoveAllButLastYear)
matches = rebulk.matches("1984 keep only last 1968 entry 1982 case")
assert len(matches) == 1
assert matches[0].value == "1982"
def test_rebulk_rules_2():
rebulk = Rebulk()
rebulk.regex(r'\d{4}', name="year")
rebulk.string(r'year', name="yearPrefix", private=True)
rebulk.string(r'keep', name="yearSuffix", private=True)
rebulk.rules(rm.PrefixedSuffixedYear)
matches = rebulk.matches("Keep suffix 1984 keep prefixed year 1968 and remove the rest 1982")
assert len(matches) == 2
assert matches[0].value == "1984"
assert matches[1].value == "1968"
def test_rebulk_rules_3():
rebulk = Rebulk()
rebulk.regex(r'\d{4}', name="year")
rebulk.string(r'year', name="yearPrefix", private=True)
rebulk.string(r'keep', name="yearSuffix", private=True)
rebulk.rules(rm.PrefixedSuffixedYearNoLambda)
matches = rebulk.matches("Keep suffix 1984 keep prefixed year 1968 and remove the rest 1982")
assert len(matches) == 2
assert matches[0].value == "1984"
assert matches[1].value == "1968"
def test_rebulk_rules_4():
class FirstOnlyRule(Rule):
def when(self, matches, context):
grabbed = matches.named("grabbed", 0)
if grabbed and matches.previous(grabbed):
return grabbed
def then(self, matches, when_response, context):
matches.remove(when_response)
rebulk = Rebulk()
rebulk.regex("This match (.*?)grabbed", name="grabbed")
rebulk.regex("if it's (.*?)first match", private=True)
rebulk.rules(FirstOnlyRule)
matches = rebulk.matches("This match is grabbed only if it's the first match")
assert len(matches) == 1
assert matches[0].value == "This match is grabbed"
matches = rebulk.matches("if it's NOT the first match, This match is NOT grabbed")
assert len(matches) == 0
class TestMarkers(object):
def test_one_marker(self):
class MarkerRule(Rule):
def when(self, matches, context):
word_match = matches.named("word", 0)
marker = matches.markers.at_match(word_match, lambda marker: marker.name == "mark1", 0)
if not marker:
return word_match
def then(self, matches, when_response, context):
matches.remove(when_response)
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
.regex(r'\[.*?\]', marker=True, name="mark2") \
.string("word", name="word") \
.rules(MarkerRule)
matches = rebulk.matches("grab (word) only if it's in parenthesis")
assert len(matches) == 1
assert matches[0].value == "word"
matches = rebulk.matches("don't grab [word] if it's in braket")
assert len(matches) == 0
matches = rebulk.matches("don't grab word at all")
assert len(matches) == 0
def test_multiple_marker(self):
class MarkerRule(Rule):
def when(self, matches, context):
word_match = matches.named("word", 0)
marker = matches.markers.at_match(word_match,
lambda marker: marker.name == "mark1" or marker.name == "mark2")
if len(marker) < 2:
return word_match
def then(self, matches, when_response, context):
matches.remove(when_response)
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
.regex(r'\[.*?\]', marker=True, name="mark2") \
.regex("w.*?d", name="word") \
.rules(MarkerRule)
matches = rebulk.matches("[grab (word) only] if it's in parenthesis and brakets")
assert len(matches) == 1
assert matches[0].value == "word"
matches = rebulk.matches("[don't grab](word)[if brakets are outside]")
assert len(matches) == 0
matches = rebulk.matches("(grab w[or)d even] if it's partially in parenthesis and brakets")
assert len(matches) == 1
assert matches[0].value == "w[or)d"
def test_at_index_marker(self):
class MarkerRule(Rule):
def when(self, matches, context):
word_match = matches.named("word", 0)
marker = matches.markers.at_index(word_match.start,
lambda marker: marker.name == "mark1", 0)
if not marker:
return word_match
def then(self, matches, when_response, context):
matches.remove(when_response)
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
.regex("w.*?d", name="word") \
.rules(MarkerRule)
matches = rebulk.matches("gr(ab wo)rd only if starting of match is inside parenthesis")
assert len(matches) == 1
assert matches[0].value == "wo)rd"
matches = rebulk.matches("don't grab wo(rd if starting of match is not inside parenthesis")
assert len(matches) == 0
def test_remove_marker(self):
class MarkerRule(Rule):
def when(self, matches, context):
marker = matches.markers.named("mark1", 0)
if marker:
return marker
def then(self, matches, when_response, context):
matches.markers.remove(when_response)
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
.regex("w.*?d", name="word") \
.rules(MarkerRule)
matches = rebulk.matches("grab word event (if it's not) inside parenthesis")
assert len(matches) == 1
assert matches[0].value == "word"
assert not matches.markers
class TestUnicode(object):
def test_rebulk_simple(self):
input_string = u"敏捷的棕色狐狸跳過懶狗"
rebulk = Rebulk()
rebulk.string(u"")
rebulk.regex(u"")
def func(input_string):
i = input_string.find(u"")
if i > -1:
return i, i + len(u"")
rebulk.functional(func)
matches = rebulk.matches(input_string)
assert len(matches) == 3
assert matches[0].value == u""
assert matches[1].value == u""
assert matches[2].value == u""
class TestImmutable(object):
def test_starting(self):
input_string = "The quick brown fox jumps over the lazy dog"
matches = Rebulk().string("quick").string("over").string("fox").matches(input_string)
for i in range(0, len(input_string)):
starting = matches.starting(i)
for match in list(starting):
starting.remove(match)
assert len(matches) == 3
def test_ending(self):
input_string = "The quick brown fox jumps over the lazy dog"
matches = Rebulk().string("quick").string("over").string("fox").matches(input_string)
for i in range(0, len(input_string)):
starting = matches.ending(i)
for match in list(starting):
starting.remove(match)
assert len(matches) == 3
def test_named(self):
input_string = "The quick brown fox jumps over the lazy dog"
matches = Rebulk().defaults(name='test').string("quick").string("over").string("fox").matches(input_string)
named = matches.named('test')
for match in list(named):
named.remove(match)
assert len(named) == 0
assert len(matches) == 3

View file

@ -0,0 +1,197 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, no-member, len-as-condition
import pytest
from rebulk.test.default_rules_module import RuleRemove0, RuleAppend0, RuleRename0, RuleAppend1, RuleRemove1, \
RuleRename1, RuleAppend2, RuleRename2, RuleAppend3, RuleRename3, RuleAppendTags0, RuleRemoveTags0, \
RuleAppendTags1, RuleRemoveTags1
from ..rules import Rules
from ..match import Matches, Match
from .rules_module import Rule1, Rule2, Rule3, Rule0, Rule1Disabled
from . import rules_module as rm
def test_rule_priority():
matches = Matches([Match(1, 2)])
rules = Rules(Rule1, Rule2())
rules.execute_all_rules(matches, {})
assert len(matches) == 0
matches = Matches([Match(1, 2)])
rules = Rules(Rule1(), Rule0)
rules.execute_all_rules(matches, {})
assert len(matches) == 1
assert matches[0] == Match(3, 4)
def test_rules_duplicates():
matches = Matches([Match(1, 2)])
rules = Rules(Rule1, Rule1)
with pytest.raises(ValueError):
rules.execute_all_rules(matches, {})
def test_rule_disabled():
matches = Matches([Match(1, 2)])
rules = Rules(Rule1Disabled(), Rule2())
rules.execute_all_rules(matches, {})
assert len(matches) == 2
assert matches[0] == Match(1, 2)
assert matches[1] == Match(3, 4)
def test_rule_when():
matches = Matches([Match(1, 2)])
rules = Rules(Rule3())
rules.execute_all_rules(matches, {'when': False})
assert len(matches) == 1
assert matches[0] == Match(1, 2)
matches = Matches([Match(1, 2)])
rules.execute_all_rules(matches, {'when': True})
assert len(matches) == 2
assert matches[0] == Match(1, 2)
assert matches[1] == Match(3, 4)
class TestDefaultRules(object):
def test_remove(self):
rules = Rules(RuleRemove0)
matches = Matches([Match(1, 2)])
rules.execute_all_rules(matches, {})
assert len(matches) == 0
rules = Rules(RuleRemove1)
matches = Matches([Match(1, 2)])
rules.execute_all_rules(matches, {})
assert len(matches) == 0
def test_append(self):
rules = Rules(RuleAppend0)
matches = Matches([Match(1, 2)])
rules.execute_all_rules(matches, {})
assert len(matches) == 2
rules = Rules(RuleAppend1)
matches = Matches([Match(1, 2)])
rules.execute_all_rules(matches, {})
assert len(matches) == 2
rules = Rules(RuleAppend2)
matches = Matches([Match(1, 2)])
rules.execute_all_rules(matches, {})
assert len(matches) == 2
assert len(matches.named('renamed')) == 1
rules = Rules(RuleAppend3)
matches = Matches([Match(1, 2)])
rules.execute_all_rules(matches, {})
assert len(matches) == 2
assert len(matches.named('renamed')) == 1
def test_rename(self):
rules = Rules(RuleRename0)
matches = Matches([Match(1, 2, name='original')])
rules.execute_all_rules(matches, {})
assert len(matches.named('original')) == 1
assert len(matches.named('renamed')) == 0
rules = Rules(RuleRename1)
matches = Matches([Match(5, 10, name='original')])
rules.execute_all_rules(matches, {})
assert len(matches.named('original')) == 0
assert len(matches.named('renamed')) == 1
rules = Rules(RuleRename2)
matches = Matches([Match(5, 10, name='original')])
rules.execute_all_rules(matches, {})
assert len(matches.named('original')) == 0
assert len(matches.named('renamed')) == 1
rules = Rules(RuleRename3)
matches = Matches([Match(5, 10, name='original')])
rules.execute_all_rules(matches, {})
assert len(matches.named('original')) == 0
assert len(matches.named('renamed')) == 1
def test_append_tags(self):
rules = Rules(RuleAppendTags0)
matches = Matches([Match(1, 2, name='tags', tags=['other'])])
rules.execute_all_rules(matches, {})
assert len(matches.named('tags')) == 1
assert matches.named('tags', index=0).tags == ['other', 'new-tag']
rules = Rules(RuleAppendTags1)
matches = Matches([Match(1, 2, name='tags', tags=['other'])])
rules.execute_all_rules(matches, {})
assert len(matches.named('tags')) == 1
assert matches.named('tags', index=0).tags == ['other', 'new-tag']
def test_remove_tags(self):
rules = Rules(RuleRemoveTags0)
matches = Matches([Match(1, 2, name='tags', tags=['other', 'new-tag'])])
rules.execute_all_rules(matches, {})
assert len(matches.named('tags')) == 1
assert matches.named('tags', index=0).tags == ['other']
rules = Rules(RuleRemoveTags1)
matches = Matches([Match(1, 2, name='tags', tags=['other', 'new-tag'])])
rules.execute_all_rules(matches, {})
assert len(matches.named('tags')) == 1
assert matches.named('tags', index=0).tags == ['other']
def test_rule_module():
rules = Rules(rm)
matches = Matches([Match(1, 2)])
rules.execute_all_rules(matches, {})
assert len(matches) == 1
def test_rule_repr():
assert str(Rule0()) == "<Rule0>"
assert str(Rule1()) == "<Rule1>"
assert str(Rule2()) == "<Rule2>"
assert str(Rule1Disabled()) == "<Disabled Rule1>"

View file

@ -0,0 +1,111 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2014 True Blade Systems, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Original:
# - https://bitbucket.org/ericvsmith/toposort (1.4)
# Modifications:
# - port to pytest
# pylint: skip-file
import pytest
from ..toposort import toposort, toposort_flatten, CyclicDependency
class TestCase(object):
def test_simple(self):
results = list(toposort({2: set([11]), 9: set([11, 8]), 10: set([11, 3]), 11: set([7, 5]), 8: set([7, 3])}))
expected = [set([3, 5, 7]), set([8, 11]), set([2, 9, 10])]
assert results == expected
# make sure self dependencies are ignored
results = list(toposort({2: set([2, 11]), 9: set([11, 8]), 10: set([10, 11, 3]), 11: set([7, 5]), 8: set([7, 3])}))
expected = [set([3, 5, 7]), set([8, 11]), set([2, 9, 10])]
assert results == expected
assert list(toposort({1: set()})) == [set([1])]
assert list(toposort({1: set([1])})) == [set([1])]
def test_no_dependencies(self):
assert list(toposort({1: set([2]), 3: set([4]), 5: set([6])})) == [set([2, 4, 6]), set([1, 3, 5])]
assert list(toposort({1: set(), 3: set(), 5: set()})) == [set([1, 3, 5])]
def test_empty(self):
assert list(toposort({})) == []
def test_strings(self):
results = list(toposort({'2': set(['11']), '9': set(['11', '8']), '10': set(['11', '3']), '11': set(['7', '5']), '8': set(['7', '3'])}))
expected = [set(['3', '5', '7']), set(['8', '11']), set(['2', '9', '10'])]
assert results == expected
def test_objects(self):
o2 = object()
o3 = object()
o5 = object()
o7 = object()
o8 = object()
o9 = object()
o10 = object()
o11 = object()
results = list(toposort({o2: set([o11]), o9: set([o11, o8]), o10: set([o11, o3]), o11: set([o7, o5]), o8: set([o7, o3, o8])}))
expected = [set([o3, o5, o7]), set([o8, o11]), set([o2, o9, o10])]
assert results == expected
def test_cycle(self):
# a simple, 2 element cycle
with pytest.raises(CyclicDependency):
list(toposort({1: set([2]), 2: set([1])}))
# an indirect cycle
with pytest.raises(CyclicDependency):
list(toposort({1: set([2]), 2: set([3]), 3: set([1])}))
def test_input_not_modified(self):
data = {2: set([11]),
9: set([11, 8]),
10: set([11, 3]),
11: set([7, 5]),
8: set([7, 3, 8]), # includes something self-referential
}
orig = data.copy()
results = list(toposort(data))
assert data == orig
def test_input_not_modified_when_cycle_error(self):
data = {1: set([2]),
2: set([1]),
3: set([4]),
}
orig = data.copy()
with pytest.raises(CyclicDependency):
list(toposort(data))
assert data == orig
class TestCaseAll(object):
def test_sort_flatten(self):
data = {2: set([11]),
9: set([11, 8]),
10: set([11, 3]),
11: set([7, 5]),
8: set([7, 3, 8]), # includes something self-referential
}
expected = [set([3, 5, 7]), set([8, 11]), set([2, 9, 10])]
assert list(toposort(data)) == expected
# now check the sorted results
results = []
for item in expected:
results.extend(sorted(item))
assert toposort_flatten(data) == results
# and the unsorted results. break the results up into groups to compare them
actual = toposort_flatten(data, False)
results = [set([i for i in actual[0:3]]), set([i for i in actual[3:5]]), set([i for i in actual[5:8]])]
assert results == expected

View file

@ -0,0 +1,64 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name,len-as-condition
from functools import partial
from rebulk.pattern import StringPattern
from ..validators import chars_before, chars_after, chars_surround, validators
chars = ' _.'
left = partial(chars_before, chars)
right = partial(chars_after, chars)
surrounding = partial(chars_surround, chars)
def test_left_chars():
matches = list(StringPattern("word", validator=left).matches("xxxwordxxx"))
assert len(matches) == 0
matches = list(StringPattern("word", validator=left).matches("xxx_wordxxx"))
assert len(matches) == 1
matches = list(StringPattern("word", validator=left).matches("wordxxx"))
assert len(matches) == 1
def test_right_chars():
matches = list(StringPattern("word", validator=right).matches("xxxwordxxx"))
assert len(matches) == 0
matches = list(StringPattern("word", validator=right).matches("xxxword.xxx"))
assert len(matches) == 1
matches = list(StringPattern("word", validator=right).matches("xxxword"))
assert len(matches) == 1
def test_surrounding_chars():
matches = list(StringPattern("word", validator=surrounding).matches("xxxword xxx"))
assert len(matches) == 0
matches = list(StringPattern("word", validator=surrounding).matches("xxx.wordxxx"))
assert len(matches) == 0
matches = list(StringPattern("word", validator=surrounding).matches("xxx word_xxx"))
assert len(matches) == 1
matches = list(StringPattern("word", validator=surrounding).matches("word"))
assert len(matches) == 1
def test_chain():
matches = list(StringPattern("word", validator=validators(left, right)).matches("xxxword xxx"))
assert len(matches) == 0
matches = list(StringPattern("word", validator=validators(left, right)).matches("xxx.wordxxx"))
assert len(matches) == 0
matches = list(StringPattern("word", validator=validators(left, right)).matches("xxx word_xxx"))
assert len(matches) == 1
matches = list(StringPattern("word", validator=validators(left, right)).matches("word"))
assert len(matches) == 1