mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-14 10:36:52 -07:00
Move common libs to libs/common
This commit is contained in:
parent
8dbb1a2451
commit
1f4bd41bcc
1612 changed files with 962 additions and 10 deletions
3
libs/common/rebulk/test/__init__.py
Normal file
3
libs/common/rebulk/test/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring
|
79
libs/common/rebulk/test/default_rules_module.py
Normal file
79
libs/common/rebulk/test/default_rules_module.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
|
||||
from ..match import Match
|
||||
from ..rules import Rule, RemoveMatch, AppendMatch, RenameMatch, AppendTags, RemoveTags
|
||||
|
||||
|
||||
class RuleRemove0(Rule):
|
||||
consequence = RemoveMatch
|
||||
def when(self, matches, context):
|
||||
return matches[0]
|
||||
|
||||
|
||||
class RuleAppend0(Rule):
|
||||
consequence = AppendMatch()
|
||||
def when(self, matches, context):
|
||||
return Match(5, 10)
|
||||
|
||||
class RuleRename0(Rule):
|
||||
consequence = [RenameMatch('renamed')]
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10, name="original")]
|
||||
|
||||
class RuleRemove1(Rule):
|
||||
consequence = [RemoveMatch()]
|
||||
def when(self, matches, context):
|
||||
return [matches[0]]
|
||||
|
||||
class RuleAppend1(Rule):
|
||||
consequence = [AppendMatch]
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10)]
|
||||
|
||||
class RuleRename1(Rule):
|
||||
consequence = RenameMatch('renamed')
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10, name="original")]
|
||||
|
||||
class RuleAppend2(Rule):
|
||||
consequence = [AppendMatch('renamed')]
|
||||
properties = {'renamed': [None]}
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10)]
|
||||
|
||||
class RuleRename2(Rule):
|
||||
consequence = RenameMatch('renamed')
|
||||
def when(self, matches, context):
|
||||
return Match(5, 10, name="original")
|
||||
|
||||
class RuleAppend3(Rule):
|
||||
consequence = AppendMatch('renamed')
|
||||
properties = {'renamed': [None]}
|
||||
def when(self, matches, context):
|
||||
return [Match(5, 10)]
|
||||
|
||||
class RuleRename3(Rule):
|
||||
consequence = [RenameMatch('renamed')]
|
||||
def when(self, matches, context):
|
||||
return Match(5, 10, name="original")
|
||||
|
||||
class RuleAppendTags0(Rule):
|
||||
consequence = AppendTags(['new-tag'])
|
||||
def when(self, matches, context):
|
||||
return matches.named('tags', 0)
|
||||
|
||||
class RuleRemoveTags0(Rule):
|
||||
consequence = RemoveTags(['new-tag'])
|
||||
def when(self, matches, context):
|
||||
return matches.named('tags', 0)
|
||||
|
||||
class RuleAppendTags1(Rule):
|
||||
consequence = AppendTags(['new-tag'])
|
||||
def when(self, matches, context):
|
||||
return matches.named('tags')
|
||||
|
||||
class RuleRemoveTags1(Rule):
|
||||
consequence = RemoveTags(['new-tag'])
|
||||
def when(self, matches, context):
|
||||
return matches.named('tags')
|
38
libs/common/rebulk/test/rebulk_rules_module.py
Normal file
38
libs/common/rebulk/test/rebulk_rules_module.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
|
||||
from rebulk.rules import Rule, RemoveMatch, CustomRule
|
||||
|
||||
|
||||
class RemoveAllButLastYear(Rule):
|
||||
consequence = RemoveMatch
|
||||
def when(self, matches, context):
|
||||
entries = matches.named('year')
|
||||
return entries[:-1]
|
||||
|
||||
|
||||
class PrefixedSuffixedYear(CustomRule):
|
||||
def when(self, matches, context):
|
||||
toRemove = []
|
||||
years = matches.named('year')
|
||||
for year in years:
|
||||
if not matches.previous(year, lambda p: p.name == 'yearPrefix') and \
|
||||
not matches.next(year, lambda n: n.name == 'yearSuffix'):
|
||||
toRemove.append(year)
|
||||
return toRemove
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
for to_remove in when_response:
|
||||
matches.remove(to_remove)
|
||||
|
||||
|
||||
class PrefixedSuffixedYearNoLambda(Rule):
|
||||
consequence = RemoveMatch
|
||||
def when(self, matches, context):
|
||||
toRemove = []
|
||||
years = matches.named('year')
|
||||
for year in years:
|
||||
if not [m for m in matches.previous(year) if m.name == 'yearPrefix'] and \
|
||||
not [m for m in matches.next(year) if m.name == 'yearSuffix']:
|
||||
toRemove.append(year)
|
||||
return toRemove
|
54
libs/common/rebulk/test/rules_module.py
Normal file
54
libs/common/rebulk/test/rules_module.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
|
||||
from ..match import Match
|
||||
from ..rules import Rule
|
||||
|
||||
|
||||
class Rule3(Rule):
|
||||
def when(self, matches, context):
|
||||
return context.get('when')
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert when_response in [True, False]
|
||||
matches.append(Match(3, 4))
|
||||
|
||||
|
||||
class Rule2(Rule):
|
||||
dependency = Rule3
|
||||
|
||||
def when(self, matches, context):
|
||||
return True
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert when_response
|
||||
matches.append(Match(3, 4))
|
||||
|
||||
|
||||
class Rule1(Rule):
|
||||
dependency = Rule2
|
||||
|
||||
def when(self, matches, context):
|
||||
return True
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert when_response
|
||||
matches.clear()
|
||||
|
||||
|
||||
class Rule0(Rule):
|
||||
dependency = Rule1
|
||||
|
||||
def when(self, matches, context):
|
||||
return True
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert when_response
|
||||
matches.append(Match(3, 4))
|
||||
|
||||
|
||||
class Rule1Disabled(Rule1):
|
||||
name = "Disabled Rule1"
|
||||
|
||||
def enabled(self, context):
|
||||
return False
|
411
libs/common/rebulk/test/test_chain.py
Normal file
411
libs/common/rebulk/test/test_chain.py
Normal file
|
@ -0,0 +1,411 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
|
||||
import re
|
||||
|
||||
from functools import partial
|
||||
|
||||
from ..validators import chars_surround
|
||||
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
|
||||
|
||||
|
||||
def test_chain_close():
|
||||
rebulk = Rebulk()
|
||||
ret = rebulk.chain().close()
|
||||
|
||||
assert ret == rebulk
|
||||
assert len(rebulk.effective_patterns()) == 1
|
||||
|
||||
|
||||
def test_build_chain():
|
||||
rebulk = Rebulk()
|
||||
|
||||
def digit(input_string):
|
||||
i = input_string.find("1849")
|
||||
if i > -1:
|
||||
return i, i + len("1849")
|
||||
|
||||
ret = rebulk.chain() \
|
||||
.functional(digit) \
|
||||
.string("test").repeater(2) \
|
||||
.string("x").repeater('{1,3}') \
|
||||
.string("optional").repeater('?') \
|
||||
.regex("f?x").repeater('+') \
|
||||
.close()
|
||||
|
||||
assert ret == rebulk
|
||||
assert len(rebulk.effective_patterns()) == 1
|
||||
|
||||
chain = rebulk.effective_patterns()[0]
|
||||
|
||||
assert len(chain.parts) == 5
|
||||
|
||||
assert isinstance(chain.parts[0].pattern, FunctionalPattern)
|
||||
assert chain.parts[0].repeater_start == 1
|
||||
assert chain.parts[0].repeater_end == 1
|
||||
|
||||
assert isinstance(chain.parts[1].pattern, StringPattern)
|
||||
assert chain.parts[1].repeater_start == 2
|
||||
assert chain.parts[1].repeater_end == 2
|
||||
|
||||
assert isinstance(chain.parts[2].pattern, StringPattern)
|
||||
assert chain.parts[2].repeater_start == 1
|
||||
assert chain.parts[2].repeater_end == 3
|
||||
|
||||
assert isinstance(chain.parts[3].pattern, StringPattern)
|
||||
assert chain.parts[3].repeater_start == 0
|
||||
assert chain.parts[3].repeater_end == 1
|
||||
|
||||
assert isinstance(chain.parts[4].pattern, RePattern)
|
||||
assert chain.parts[4].repeater_start == 1
|
||||
assert chain.parts[4].repeater_end is None
|
||||
|
||||
|
||||
def test_chain_defaults():
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
|
||||
|
||||
rebulk.chain()\
|
||||
.regex("(?P<test>test)") \
|
||||
.regex(" ").repeater("*") \
|
||||
.regex("(?P<testIgnore>testIgnore)")
|
||||
matches = rebulk.matches("test testIgnore")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
|
||||
|
||||
def test_matches():
|
||||
rebulk = Rebulk()
|
||||
|
||||
def digit(input_string):
|
||||
i = input_string.find("1849")
|
||||
if i > -1:
|
||||
return i, i + len("1849")
|
||||
|
||||
input_string = "1849testtestxxfixfux_foxabc1849testtestxoptionalfoxabc"
|
||||
|
||||
chain = rebulk.chain() \
|
||||
.functional(digit) \
|
||||
.string("test").hidden().repeater(2) \
|
||||
.string("x").hidden().repeater('{1,3}') \
|
||||
.string("optional").hidden().repeater('?') \
|
||||
.regex("f.?x", name='result').repeater('+') \
|
||||
.close()
|
||||
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 2
|
||||
children = matches[0].children
|
||||
|
||||
assert children[0].value == '1849'
|
||||
assert children[1].value == 'fix'
|
||||
assert children[2].value == 'fux'
|
||||
|
||||
children = matches[1].children
|
||||
assert children[0].value == '1849'
|
||||
assert children[1].value == 'fox'
|
||||
|
||||
input_string = "_1850testtestxoptionalfoxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtesttesttestxoptionalfoxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtestxxxxoptionalfoxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtestoptionalfoxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtestxoptionalabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
input_string = "_1849testtestxoptionalfaxabc"
|
||||
matches = chain.matches(input_string)
|
||||
|
||||
assert len(matches) == 1
|
||||
children = matches[0].children
|
||||
|
||||
assert children[0].value == '1849'
|
||||
assert children[1].value == 'fax'
|
||||
|
||||
|
||||
def test_matches_2():
|
||||
rebulk = Rebulk() \
|
||||
.regex_defaults(flags=re.IGNORECASE) \
|
||||
.chain(children=True, formatter={'episode': int}) \
|
||||
.defaults(formatter={'version': int}) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
|
||||
.close()
|
||||
|
||||
matches = rebulk.matches("This is E14v2-15E16x17")
|
||||
assert len(matches) == 5
|
||||
|
||||
assert matches[0].name == 'episode'
|
||||
assert matches[0].value == 14
|
||||
|
||||
assert matches[1].name == 'version'
|
||||
assert matches[1].value == 2
|
||||
|
||||
assert matches[2].name == 'episode'
|
||||
assert matches[2].value == 15
|
||||
|
||||
assert matches[3].name == 'episode'
|
||||
assert matches[3].value == 16
|
||||
|
||||
assert matches[4].name == 'episode'
|
||||
assert matches[4].value == 17
|
||||
|
||||
|
||||
def test_matches_3():
|
||||
alt_dash = (r'@', r'[\W_]') # abbreviation
|
||||
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.chain(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
private_names=['episodeSeparator', 'seasonSeparator'],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=lambda match, other: match
|
||||
if match.name in ['season', 'episode'] and other.name in
|
||||
['screen_size', 'video_codec', 'audio_codec',
|
||||
'audio_channels', 'container', 'date']
|
||||
else '__default__') \
|
||||
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
|
||||
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
|
||||
.chain() \
|
||||
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
|
||||
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
|
||||
.chain() \
|
||||
.regex(r'S(?P<season>\d+)') \
|
||||
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
|
||||
|
||||
matches = rebulk.matches("test-01x02-03")
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].name == 'season'
|
||||
assert matches[0].value == 1
|
||||
|
||||
assert matches[1].name == 'episode'
|
||||
assert matches[1].value == 2
|
||||
|
||||
assert matches[2].name == 'episode'
|
||||
assert matches[2].value == 3
|
||||
|
||||
matches = rebulk.matches("test-S01E02-03")
|
||||
|
||||
assert len(matches) == 3
|
||||
assert matches[0].name == 'season'
|
||||
assert matches[0].value == 1
|
||||
|
||||
assert matches[1].name == 'episode'
|
||||
assert matches[1].value == 2
|
||||
|
||||
assert matches[2].name == 'episode'
|
||||
assert matches[2].value == 3
|
||||
|
||||
matches = rebulk.matches("test-S01-02-03-04")
|
||||
|
||||
assert len(matches) == 4
|
||||
assert matches[0].name == 'season'
|
||||
assert matches[0].value == 1
|
||||
|
||||
assert matches[1].name == 'season'
|
||||
assert matches[1].value == 2
|
||||
|
||||
assert matches[2].name == 'season'
|
||||
assert matches[2].value == 3
|
||||
|
||||
assert matches[3].name == 'season'
|
||||
assert matches[3].value == 4
|
||||
|
||||
|
||||
def test_matches_4():
|
||||
seps_surround = partial(chars_surround, " ")
|
||||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03")
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].value == 1
|
||||
assert matches[1].value == 2
|
||||
assert matches[2].value == 3
|
||||
|
||||
|
||||
def test_matches_5():
|
||||
seps_surround = partial(chars_surround, " ")
|
||||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03")
|
||||
assert len(matches) == 3
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02")
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03E04E05E06") # Parent can't be validated, so no results at all
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
def test_matches_6():
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator=None, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03")
|
||||
assert len(matches) == 3
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02")
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = rebulk.matches("Some Series E01E02E03E04E05E06") # No validator on parent, so it should give 4 episodes.
|
||||
assert len(matches) == 4
|
||||
|
||||
|
||||
def test_matches_7():
|
||||
seps_surround = partial(chars_surround, ' .-/')
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(). \
|
||||
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
|
||||
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some S01")
|
||||
assert len(matches) == 1
|
||||
matches[0].value = 1
|
||||
|
||||
matches = rebulk.matches("Some S01-02")
|
||||
assert len(matches) == 2
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
|
||||
matches = rebulk.matches("programs4/Some S01-02")
|
||||
assert len(matches) == 2
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
|
||||
matches = rebulk.matches("programs4/SomeS01middle.S02-03.andS04here")
|
||||
assert len(matches) == 2
|
||||
matches[0].value = 2
|
||||
matches[1].value = 3
|
||||
|
||||
matches = rebulk.matches("Some 02.and.S04-05.here")
|
||||
assert len(matches) == 2
|
||||
matches[0].value = 4
|
||||
matches[1].value = 5
|
||||
|
||||
|
||||
def test_chain_breaker():
|
||||
def chain_breaker(matches):
|
||||
seasons = matches.named('season')
|
||||
if len(seasons) > 1:
|
||||
if seasons[-1].value - seasons[-2].value > 10:
|
||||
return True
|
||||
return False
|
||||
|
||||
seps_surround = partial(chars_surround, ' .-/')
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(children=True, private_parent=True, formatter={'season': int})
|
||||
|
||||
rebulk.chain(chain_breaker=chain_breaker). \
|
||||
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
|
||||
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some S01-02-03-50-51")
|
||||
assert len(matches) == 3
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
matches[2].value = 3
|
||||
|
||||
|
||||
def test_chain_breaker_defaults():
|
||||
def chain_breaker(matches):
|
||||
seasons = matches.named('season')
|
||||
if len(seasons) > 1:
|
||||
if seasons[-1].value - seasons[-2].value > 10:
|
||||
return True
|
||||
return False
|
||||
|
||||
seps_surround = partial(chars_surround, ' .-/')
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(chain_breaker=chain_breaker, children=True, private_parent=True, formatter={'season': int})
|
||||
|
||||
rebulk.chain(). \
|
||||
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
|
||||
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some S01-02-03-50-51")
|
||||
assert len(matches) == 3
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
matches[2].value = 3
|
||||
|
||||
|
||||
def test_chain_breaker_defaults2():
|
||||
def chain_breaker(matches):
|
||||
seasons = matches.named('season')
|
||||
if len(seasons) > 1:
|
||||
if seasons[-1].value - seasons[-2].value > 10:
|
||||
return True
|
||||
return False
|
||||
|
||||
seps_surround = partial(chars_surround, ' .-/')
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.chain_defaults(chain_breaker=chain_breaker)
|
||||
rebulk.defaults(children=True, private_parent=True, formatter={'season': int})
|
||||
|
||||
rebulk.chain(). \
|
||||
regex(r'S(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_surround}). \
|
||||
regex(r'[ -](?P<season>\d+)', validator=seps_surround).repeater('*')
|
||||
|
||||
matches = rebulk.matches("Some S01-02-03-50-51")
|
||||
assert len(matches) == 3
|
||||
matches[0].value = 1
|
||||
matches[1].value = 2
|
||||
matches[2].value = 3
|
83
libs/common/rebulk/test/test_debug.py
Normal file
83
libs/common/rebulk/test/test_debug.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
|
||||
|
||||
from ..pattern import StringPattern
|
||||
from ..rebulk import Rebulk
|
||||
from ..match import Match
|
||||
from .. import debug
|
||||
from .default_rules_module import RuleRemove0
|
||||
|
||||
|
||||
class TestDebug(object):
|
||||
|
||||
|
||||
#request.addfinalizer(disable_debug)
|
||||
|
||||
|
||||
|
||||
debug.DEBUG = True
|
||||
pattern = StringPattern(1, 3, value="es")
|
||||
|
||||
match = Match(1, 3, value="es")
|
||||
rule = RuleRemove0()
|
||||
|
||||
input_string = "This is a debug test"
|
||||
rebulk = Rebulk().string("debug") \
|
||||
.string("is")
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
debug.DEBUG = False
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
debug.DEBUG = True
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
debug.DEBUG = False
|
||||
|
||||
def test_pattern(self):
|
||||
assert self.pattern.defined_at.lineno == 20
|
||||
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.pattern.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.pattern.defined_at) == 'test_debug.py#L20'
|
||||
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
|
||||
|
||||
def test_match(self):
|
||||
assert self.match.defined_at.lineno == 22
|
||||
assert self.match.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.match.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.match.defined_at) == 'test_debug.py#L22'
|
||||
|
||||
def test_rule(self):
|
||||
assert self.rule.defined_at.lineno == 23
|
||||
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rule.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rule.defined_at) == 'test_debug.py#L23'
|
||||
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
|
||||
|
||||
def test_rebulk(self):
|
||||
"""
|
||||
This test fails on travis CI, can't find out why there's 1 line offset ...
|
||||
"""
|
||||
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
|
||||
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
|
||||
|
||||
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
|
||||
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
|
||||
|
||||
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
|
||||
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at
|
||||
|
||||
def test_repr(self):
|
||||
str(self.matches)
|
138
libs/common/rebulk/test/test_introspector.py
Normal file
138
libs/common/rebulk/test/test_introspector.py
Normal file
|
@ -0,0 +1,138 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Introspector tests
|
||||
"""
|
||||
# pylint: disable=no-self-use,pointless-statement,missing-docstring,protected-access,invalid-name,len-as-condition
|
||||
from ..rebulk import Rebulk
|
||||
from .. import introspector
|
||||
from .default_rules_module import RuleAppend2, RuleAppend3
|
||||
|
||||
|
||||
def test_string_introspector():
|
||||
rebulk = Rebulk().string('One', 'Two', 'Three', name='first').string('1', '2', '3', name='second')
|
||||
|
||||
introspected = introspector.introspect(rebulk, None)
|
||||
|
||||
assert len(introspected.patterns) == 2
|
||||
|
||||
first_properties = introspected.patterns[0].properties
|
||||
assert len(first_properties) == 1
|
||||
first_properties['first'] == ['One', 'Two', 'Three']
|
||||
|
||||
second_properties = introspected.patterns[1].properties
|
||||
assert len(second_properties) == 1
|
||||
second_properties['second'] == ['1', '2', '3']
|
||||
|
||||
properties = introspected.properties
|
||||
assert len(properties) == 2
|
||||
assert properties['first'] == first_properties['first']
|
||||
assert properties['second'] == second_properties['second']
|
||||
|
||||
|
||||
def test_string_properties():
|
||||
rebulk = Rebulk()\
|
||||
.string('One', 'Two', 'Three', name='first', properties={'custom': ['One']})\
|
||||
.string('1', '2', '3', name='second', properties={'custom': [1]})
|
||||
|
||||
introspected = introspector.introspect(rebulk, None)
|
||||
|
||||
assert len(introspected.patterns) == 2
|
||||
assert len(introspected.rules) == 2
|
||||
|
||||
first_properties = introspected.patterns[0].properties
|
||||
assert len(first_properties) == 1
|
||||
first_properties['custom'] == ['One']
|
||||
|
||||
second_properties = introspected.patterns[1].properties
|
||||
assert len(second_properties) == 1
|
||||
second_properties['custom'] == [1]
|
||||
|
||||
properties = introspected.properties
|
||||
assert len(properties) == 1
|
||||
assert properties['custom'] == ['One', 1]
|
||||
|
||||
|
||||
def test_various_pattern():
|
||||
rebulk = Rebulk()\
|
||||
.regex('One', 'Two', 'Three', name='first', value="string") \
|
||||
.string('1', '2', '3', name='second', value="digit") \
|
||||
.string('4', '5', '6', name='third') \
|
||||
.string('private', private=True) \
|
||||
.functional(lambda string: (0, 5), name='func', value='test') \
|
||||
.regex('One', 'Two', 'Three', name='regex_name') \
|
||||
.regex('(?P<one>One)(?P<two>Two)(?P<three>Three)') \
|
||||
.functional(lambda string: (6, 10), name='func2') \
|
||||
.string('7', name='third')
|
||||
|
||||
introspected = introspector.introspect(rebulk, None)
|
||||
|
||||
assert len(introspected.patterns) == 8
|
||||
assert len(introspected.rules) == 2
|
||||
|
||||
first_properties = introspected.patterns[0].properties
|
||||
assert len(first_properties) == 1
|
||||
first_properties['first'] == ['string']
|
||||
|
||||
second_properties = introspected.patterns[1].properties
|
||||
assert len(second_properties) == 1
|
||||
second_properties['second'] == ['digit']
|
||||
|
||||
third_properties = introspected.patterns[2].properties
|
||||
assert len(third_properties) == 1
|
||||
third_properties['third'] == ['4', '5', '6']
|
||||
|
||||
func_properties = introspected.patterns[3].properties
|
||||
assert len(func_properties) == 1
|
||||
func_properties['func'] == ['test']
|
||||
|
||||
regex_name_properties = introspected.patterns[4].properties
|
||||
assert len(regex_name_properties) == 1
|
||||
regex_name_properties['regex_name'] == [None]
|
||||
|
||||
regex_groups_properties = introspected.patterns[5].properties
|
||||
assert len(regex_groups_properties) == 3
|
||||
regex_groups_properties['one'] == [None]
|
||||
regex_groups_properties['two'] == [None]
|
||||
regex_groups_properties['three'] == [None]
|
||||
|
||||
func2_properties = introspected.patterns[6].properties
|
||||
assert len(func2_properties) == 1
|
||||
func2_properties['func2'] == [None]
|
||||
|
||||
append_third_properties = introspected.patterns[7].properties
|
||||
assert len(append_third_properties) == 1
|
||||
append_third_properties['third'] == [None]
|
||||
|
||||
properties = introspected.properties
|
||||
assert len(properties) == 9
|
||||
assert properties['first'] == first_properties['first']
|
||||
assert properties['second'] == second_properties['second']
|
||||
assert properties['third'] == third_properties['third'] + append_third_properties['third']
|
||||
assert properties['func'] == func_properties['func']
|
||||
assert properties['regex_name'] == regex_name_properties['regex_name']
|
||||
assert properties['one'] == regex_groups_properties['one']
|
||||
assert properties['two'] == regex_groups_properties['two']
|
||||
assert properties['three'] == regex_groups_properties['three']
|
||||
assert properties['func2'] == func2_properties['func2']
|
||||
|
||||
|
||||
def test_rule_properties():
|
||||
rebulk = Rebulk(default_rules=False).rules(RuleAppend2, RuleAppend3)
|
||||
|
||||
introspected = introspector.introspect(rebulk, None)
|
||||
|
||||
assert len(introspected.rules) == 2
|
||||
assert len(introspected.patterns) == 0
|
||||
|
||||
rule_properties = introspected.rules[0].properties
|
||||
assert len(rule_properties) == 1
|
||||
assert rule_properties['renamed'] == [None]
|
||||
|
||||
rule_properties = introspected.rules[1].properties
|
||||
assert len(rule_properties) == 1
|
||||
assert rule_properties['renamed'] == [None]
|
||||
|
||||
properties = introspected.properties
|
||||
assert len(properties) == 1
|
||||
assert properties['renamed'] == [None]
|
83
libs/common/rebulk/test/test_loose.py
Normal file
83
libs/common/rebulk/test/test_loose.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, len-as-condition
|
||||
|
||||
from ..loose import call
|
||||
|
||||
|
||||
def test_loose_function():
|
||||
|
||||
def func(v1, v2, v3=3, v4=4):
|
||||
return v1 + v2 + v3 + v4
|
||||
|
||||
assert call(func, 1, 2) == func(1, 2)
|
||||
assert call(func, 1, 2, 3, 5) == func(1, 2, 3, 5)
|
||||
assert call(func, 1, 2, v3=4, v4=5) == func(1, 2, v3=4, v4=5)
|
||||
assert call(func, 1, 2, 3, 4, 5) == func(1, 2, 3, 4)
|
||||
assert call(func, 1, 2, 3, 4, more=5) == func(1, 2, 3, 4)
|
||||
|
||||
|
||||
def test_loose_varargs_function():
|
||||
def func(v1, v2, *args):
|
||||
return v1 + v2 + args[0] if len(args) > 0 else 3 + args[1] if len(args) > 1 else 4
|
||||
|
||||
assert call(func, 1, 2) == func(1, 2)
|
||||
assert call(func, 1, 2, 3, 5) == func(1, 2, 3, 5)
|
||||
assert call(func, 1, 2, 3, 4, 5) == func(1, 2, 3, 4)
|
||||
|
||||
|
||||
def test_loose_kwargs_function():
|
||||
def func(v1, v2, **kwargs):
|
||||
return v1 + v2 + kwargs.get('v3', 3) + kwargs.get('v4', 4)
|
||||
|
||||
assert call(func, v1=1, v2=2) == func(v1=1, v2=2)
|
||||
assert call(func, v1=1, v2=2, v3=3, v4=5) == func(v1=1, v2=2, v3=3, v4=5)
|
||||
|
||||
|
||||
def test_loose_class():
|
||||
class Dummy(object):
|
||||
def __init__(self, v1, v2, v3=3, v4=4):
|
||||
self.v1 = v1
|
||||
self.v2 = v2
|
||||
self.v3 = v3
|
||||
self.v4 = v4
|
||||
|
||||
def call(self):
|
||||
return self.v1 + self.v2 + self.v3 + self.v4
|
||||
|
||||
assert call(Dummy, 1, 2).call() == Dummy(1, 2).call()
|
||||
assert call(Dummy, 1, 2, 3, 5).call() == Dummy(1, 2, 3, 5).call()
|
||||
assert call(Dummy, 1, 2, v3=4, v4=5).call() == Dummy(1, 2, v3=4, v4=5).call()
|
||||
assert call(Dummy, 1, 2, 3, 4, 5).call() == Dummy(1, 2, 3, 4).call()
|
||||
assert call(Dummy, 1, 2, 3, 4, more=5).call() == Dummy(1, 2, 3, 4).call()
|
||||
|
||||
|
||||
def test_loose_varargs_class():
|
||||
class Dummy(object):
|
||||
def __init__(self, v1, v2, *args):
|
||||
self.v1 = v1
|
||||
self.v2 = v2
|
||||
self.v3 = args[0] if len(args) > 0 else 3
|
||||
self.v4 = args[1] if len(args) > 1 else 4
|
||||
|
||||
def call(self):
|
||||
return self.v1 + self.v2 + self.v3 + self.v4
|
||||
|
||||
assert call(Dummy, 1, 2).call() == Dummy(1, 2).call()
|
||||
assert call(Dummy, 1, 2, 3, 5).call() == Dummy(1, 2, 3, 5).call()
|
||||
assert call(Dummy, 1, 2, 3, 4, 5).call() == Dummy(1, 2, 3, 4).call()
|
||||
|
||||
|
||||
def test_loose_kwargs_class():
|
||||
class Dummy(object):
|
||||
def __init__(self, v1, v2, **kwargs):
|
||||
self.v1 = v1
|
||||
self.v2 = v2
|
||||
self.v3 = kwargs.get('v3', 3)
|
||||
self.v4 = kwargs.get('v4', 4)
|
||||
|
||||
def call(self):
|
||||
return self.v1 + self.v2 + self.v3 + self.v4
|
||||
|
||||
assert call(Dummy, v1=1, v2=2).call() == Dummy(v1=1, v2=2).call()
|
||||
assert call(Dummy, v1=1, v2=2, v3=3, v4=5).call() == Dummy(v1=1, v2=2, v3=3, v4=5).call()
|
568
libs/common/rebulk/test/test_match.py
Normal file
568
libs/common/rebulk/test/test_match.py
Normal file
|
@ -0,0 +1,568 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, unneeded-not, len-as-condition
|
||||
|
||||
import pytest
|
||||
import six
|
||||
|
||||
from ..match import Match, Matches
|
||||
from ..pattern import StringPattern, RePattern
|
||||
from ..formatters import formatters
|
||||
|
||||
|
||||
class TestMatchClass(object):
|
||||
def test_repr(self):
|
||||
match1 = Match(1, 3, value="es")
|
||||
|
||||
assert repr(match1) == '<es:(1, 3)>'
|
||||
|
||||
match2 = Match(0, 4, value="test", private=True, name="abc", tags=['one', 'two'])
|
||||
|
||||
assert repr(match2) == '<test:(0, 4)+private+name=abc+tags=[\'one\', \'two\']>'
|
||||
|
||||
def test_names(self):
|
||||
parent = Match(0, 10, name="test")
|
||||
parent.children.append(Match(0, 10, name="child1", parent=parent))
|
||||
parent.children.append(Match(0, 10, name="child2", parent=parent))
|
||||
|
||||
assert set(parent.names) == set(["child1", "child2"])
|
||||
|
||||
def test_equality(self):
|
||||
match1 = Match(1, 3, value="es")
|
||||
match2 = Match(1, 3, value="es")
|
||||
|
||||
other = object()
|
||||
|
||||
assert hash(match1) == hash(match2)
|
||||
assert hash(match1) != hash(other)
|
||||
|
||||
assert match1 == match2
|
||||
assert not match1 == other
|
||||
|
||||
def test_inequality(self):
|
||||
match1 = Match(0, 2, value="te")
|
||||
match2 = Match(2, 4, value="st")
|
||||
match3 = Match(0, 2, value="other")
|
||||
|
||||
other = object()
|
||||
|
||||
assert hash(match1) != hash(match2)
|
||||
assert hash(match1) != hash(match3)
|
||||
|
||||
assert match1 != other
|
||||
assert match1 != match2
|
||||
assert match1 != match3
|
||||
|
||||
def test_length(self):
|
||||
match1 = Match(0, 4, value="test")
|
||||
match2 = Match(0, 2, value="spanIsUsed")
|
||||
|
||||
assert len(match1) == 4
|
||||
assert len(match2) == 2
|
||||
|
||||
def test_compare(self):
|
||||
match1 = Match(0, 2, value="te")
|
||||
match2 = Match(2, 4, value="st")
|
||||
|
||||
other = object()
|
||||
|
||||
assert match1 < match2
|
||||
assert match1 <= match2
|
||||
|
||||
assert match2 > match1
|
||||
assert match2 >= match1
|
||||
|
||||
if six.PY3:
|
||||
with pytest.raises(TypeError):
|
||||
match1 < other
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
match1 <= other
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
match1 > other
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
match1 >= other
|
||||
else:
|
||||
assert match1 < other
|
||||
assert match1 <= other
|
||||
assert not match1 > other
|
||||
assert not match1 >= other
|
||||
|
||||
def test_value(self):
|
||||
match1 = Match(1, 3)
|
||||
match1.value = "test"
|
||||
|
||||
assert match1.value == "test"
|
||||
|
||||
|
||||
class TestMatchesClass(object):
|
||||
match1 = Match(0, 2, value="te", name="start")
|
||||
match2 = Match(2, 3, value="s", tags="tag1")
|
||||
match3 = Match(3, 4, value="t", tags=["tag1", "tag2"])
|
||||
match4 = Match(2, 4, value="st", name="end")
|
||||
|
||||
def test_tag(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
assert "start" in matches.names
|
||||
assert "end" in matches.names
|
||||
|
||||
assert "tag1" in matches.tags
|
||||
assert "tag2" in matches.tags
|
||||
|
||||
tag1 = matches.tagged("tag1")
|
||||
assert len(tag1) == 2
|
||||
assert tag1[0] == self.match2
|
||||
assert tag1[1] == self.match3
|
||||
|
||||
tag2 = matches.tagged("tag2")
|
||||
assert len(tag2) == 1
|
||||
assert tag2[0] == self.match3
|
||||
|
||||
start = matches.named("start")
|
||||
assert len(start) == 1
|
||||
assert start[0] == self.match1
|
||||
|
||||
end = matches.named("end")
|
||||
assert len(end) == 1
|
||||
assert end[0] == self.match4
|
||||
|
||||
def test_base(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
|
||||
assert len(matches) == 1
|
||||
assert repr(matches) == repr([self.match1])
|
||||
assert list(matches.starting(0)) == [self.match1]
|
||||
assert list(matches.ending(2)) == [self.match1]
|
||||
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
assert len(matches) == 4
|
||||
assert list(matches.starting(2)) == [self.match2, self.match4]
|
||||
assert list(matches.starting(3)) == [self.match3]
|
||||
assert list(matches.ending(3)) == [self.match2]
|
||||
assert list(matches.ending(4)) == [self.match3, self.match4]
|
||||
assert list(matches.range()) == [self.match1, self.match2, self.match4, self.match3]
|
||||
assert list(matches.range(0)) == [self.match1, self.match2, self.match4, self.match3]
|
||||
assert list(matches.range(0, 3)) == [self.match1, self.match2, self.match4]
|
||||
assert list(matches.range(2, 3)) == [self.match2, self.match4]
|
||||
assert list(matches.range(3, 4)) == [self.match4, self.match3]
|
||||
|
||||
matches.remove(self.match1)
|
||||
assert len(matches) == 3
|
||||
assert len(matches.starting(0)) == 0
|
||||
assert len(matches.ending(2)) == 0
|
||||
|
||||
matches.clear()
|
||||
|
||||
assert len(matches) == 0
|
||||
assert len(matches.starting(0)) == 0
|
||||
assert len(matches.starting(2)) == 0
|
||||
assert len(matches.starting(3)) == 0
|
||||
assert len(matches.ending(2)) == 0
|
||||
assert len(matches.ending(3)) == 0
|
||||
assert len(matches.ending(4)) == 0
|
||||
|
||||
def test_get_slices(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
slice_matches = matches[1:3]
|
||||
|
||||
assert isinstance(slice_matches, Matches)
|
||||
|
||||
assert len(slice_matches) == 2
|
||||
assert slice_matches[0] == self.match2
|
||||
assert slice_matches[1] == self.match3
|
||||
|
||||
def test_remove_slices(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
del matches[1:3]
|
||||
|
||||
assert len(matches) == 2
|
||||
assert matches[0] == self.match1
|
||||
assert matches[1] == self.match4
|
||||
|
||||
def test_set_slices(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
matches.append(self.match4)
|
||||
|
||||
matches[1:3] = self.match1, self.match4
|
||||
|
||||
assert len(matches) == 4
|
||||
assert matches[0] == self.match1
|
||||
assert matches[1] == self.match1
|
||||
assert matches[2] == self.match4
|
||||
assert matches[3] == self.match4
|
||||
|
||||
def test_set_index(self):
|
||||
matches = Matches()
|
||||
matches.append(self.match1)
|
||||
matches.append(self.match2)
|
||||
matches.append(self.match3)
|
||||
|
||||
matches[1] = self.match4
|
||||
|
||||
assert len(matches) == 3
|
||||
assert matches[0] == self.match1
|
||||
assert matches[1] == self.match4
|
||||
assert matches[2] == self.match3
|
||||
|
||||
def test_constructor(self):
|
||||
matches = Matches([self.match1, self.match2, self.match3, self.match4])
|
||||
|
||||
assert len(matches) == 4
|
||||
assert list(matches.starting(0)) == [self.match1]
|
||||
assert list(matches.ending(2)) == [self.match1]
|
||||
assert list(matches.starting(2)) == [self.match2, self.match4]
|
||||
assert list(matches.starting(3)) == [self.match3]
|
||||
assert list(matches.ending(3)) == [self.match2]
|
||||
assert list(matches.ending(4)) == [self.match3, self.match4]
|
||||
|
||||
def test_constructor_kwargs(self):
|
||||
matches = Matches([self.match1, self.match2, self.match3, self.match4], input_string="test")
|
||||
|
||||
assert len(matches) == 4
|
||||
assert matches.input_string == "test"
|
||||
assert list(matches.starting(0)) == [self.match1]
|
||||
assert list(matches.ending(2)) == [self.match1]
|
||||
assert list(matches.starting(2)) == [self.match2, self.match4]
|
||||
assert list(matches.starting(3)) == [self.match3]
|
||||
assert list(matches.ending(3)) == [self.match2]
|
||||
assert list(matches.ending(4)) == [self.match3, self.match4]
|
||||
|
||||
def test_crop(self):
|
||||
input_string = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
match1 = Match(1, 10, input_string=input_string)
|
||||
match2 = Match(0, 2, input_string=input_string)
|
||||
match3 = Match(8, 15, input_string=input_string)
|
||||
|
||||
ret = match1.crop([match2, match3.span])
|
||||
|
||||
assert len(ret) == 1
|
||||
|
||||
assert ret[0].span == (2, 8)
|
||||
assert ret[0].value == "cdefgh"
|
||||
|
||||
ret = match1.crop((1, 10))
|
||||
assert len(ret) == 0
|
||||
|
||||
ret = match1.crop((1, 3))
|
||||
assert len(ret) == 1
|
||||
assert ret[0].span == (3, 10)
|
||||
|
||||
ret = match1.crop((7, 10))
|
||||
assert len(ret) == 1
|
||||
assert ret[0].span == (1, 7)
|
||||
|
||||
ret = match1.crop((0, 12))
|
||||
assert len(ret) == 0
|
||||
|
||||
ret = match1.crop((4, 6))
|
||||
assert len(ret) == 2
|
||||
|
||||
assert ret[0].span == (1, 4)
|
||||
assert ret[1].span == (6, 10)
|
||||
|
||||
ret = match1.crop([(3, 5), (7, 9)])
|
||||
assert len(ret) == 3
|
||||
|
||||
assert ret[0].span == (1, 3)
|
||||
assert ret[1].span == (5, 7)
|
||||
assert ret[2].span == (9, 10)
|
||||
|
||||
def test_split(self):
|
||||
input_string = "123 +word1 - word2 + word3 456"
|
||||
match = Match(3, len(input_string) - 3, input_string=input_string)
|
||||
splitted = match.split(" -+")
|
||||
|
||||
assert len(splitted) == 3
|
||||
assert [split.value for split in splitted] == ["word1", "word2", "word3"]
|
||||
|
||||
|
||||
class TestMaches(object):
|
||||
def test_names(self):
|
||||
input_string = "One Two Three"
|
||||
|
||||
matches = Matches()
|
||||
|
||||
matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string))
|
||||
|
||||
assert set(matches.names) == set(["1-str", "1-re", "2-str", "2-re", "3-str", "3-re"])
|
||||
|
||||
def test_filters(self):
|
||||
input_string = "One Two Three"
|
||||
|
||||
matches = Matches()
|
||||
|
||||
matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string))
|
||||
|
||||
selection = matches.starting(0)
|
||||
assert len(selection) == 2
|
||||
|
||||
selection = matches.starting(0, lambda m: "str" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].pattern.name == "1-str"
|
||||
|
||||
selection = matches.ending(7, predicate=lambda m: "str" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].pattern.name == "2-str"
|
||||
|
||||
selection = matches.previous(matches.named("2-str")[0])
|
||||
assert len(selection) == 2
|
||||
assert selection[0].pattern.name == "1-str"
|
||||
assert selection[1].pattern.name == "1-re"
|
||||
|
||||
selection = matches.previous(matches.named("2-str", 0), lambda m: "str" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].pattern.name == "1-str"
|
||||
|
||||
selection = matches.next(matches.named("2-str", 0))
|
||||
assert len(selection) == 2
|
||||
assert selection[0].pattern.name == "3-str"
|
||||
assert selection[1].pattern.name == "3-re"
|
||||
|
||||
selection = matches.next(matches.named("2-str", 0), index=0, predicate=lambda m: "re" in m.tags)
|
||||
assert selection is not None
|
||||
assert selection.pattern.name == "3-re"
|
||||
|
||||
selection = matches.next(matches.named("2-str", index=0), lambda m: "re" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].pattern.name == "3-re"
|
||||
|
||||
selection = matches.named("2-str", lambda m: "re" in m.tags)
|
||||
assert len(selection) == 0
|
||||
|
||||
selection = matches.named("2-re", lambda m: "re" in m.tags, 0)
|
||||
assert selection is not None
|
||||
assert selection.name == "2-re" # pylint:disable=no-member
|
||||
|
||||
selection = matches.named("2-re", lambda m: "re" in m.tags)
|
||||
assert len(selection) == 1
|
||||
assert selection[0].name == "2-re"
|
||||
|
||||
selection = matches.named("2-re", lambda m: "re" in m.tags, index=1000)
|
||||
assert selection is None
|
||||
|
||||
def test_raw(self):
|
||||
input_string = "0123456789"
|
||||
|
||||
match = Match(0, 10, input_string=input_string, formatter=lambda s: s*2)
|
||||
|
||||
assert match.value == match.raw * 2
|
||||
assert match.raw == input_string
|
||||
|
||||
match.raw_end = 9
|
||||
match.raw_start = 1
|
||||
|
||||
assert match.value == match.raw * 2
|
||||
assert match.raw == input_string[1:9]
|
||||
|
||||
match.raw_end = None
|
||||
match.raw_start = None
|
||||
|
||||
assert match.value == match.raw * 2
|
||||
assert match.raw == input_string
|
||||
|
||||
|
||||
def test_formatter_chain(self):
|
||||
input_string = "100"
|
||||
|
||||
match = Match(0, 3, input_string=input_string, formatter=formatters(int, lambda s: s*2, lambda s: s+10))
|
||||
|
||||
assert match.raw == input_string
|
||||
assert match.value == 100 * 2 + 10
|
||||
|
||||
|
||||
def test_to_dict(self):
|
||||
input_string = "One Two Two Three"
|
||||
|
||||
matches = Matches()
|
||||
|
||||
matches.extend(StringPattern("One", name="1", tags=["One", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("One", name="1", tags=["One", "re"]).matches(input_string))
|
||||
matches.extend(StringPattern("Two", name="2", tags=["Two", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Two", name="2", tags=["Two", "re"]).matches(input_string))
|
||||
matches.extend(RePattern("Two", name="2", tags=["Two", "reBis"]).matches(input_string))
|
||||
matches.extend(StringPattern("Three", name="3", tags=["Three", "str"]).matches(input_string))
|
||||
matches.extend(RePattern("Three", name="3bis", tags=["Three", "re"]).matches(input_string))
|
||||
matches.extend(RePattern(r"(\w+)", name="words").matches(input_string))
|
||||
|
||||
kvalues = matches.to_dict(first_value=True)
|
||||
assert kvalues == {"1": "One",
|
||||
"2": "Two",
|
||||
"3": "Three",
|
||||
"3bis": "Three",
|
||||
"words": "One"}
|
||||
assert kvalues.values_list["words"] == ["One", "Two", "Three"]
|
||||
|
||||
kvalues = matches.to_dict(enforce_list=True)
|
||||
assert kvalues["words"] == ["One", "Two", "Three"]
|
||||
|
||||
kvalues = matches.to_dict(details=True)
|
||||
assert kvalues["1"].value == "One"
|
||||
|
||||
assert len(kvalues["2"]) == 2
|
||||
assert kvalues["2"][0].value == "Two"
|
||||
assert kvalues["2"][1].value == "Two"
|
||||
|
||||
assert kvalues["3"].value == "Three"
|
||||
assert kvalues["3bis"].value == "Three"
|
||||
|
||||
assert len(kvalues["words"]) == 4
|
||||
assert kvalues["words"][0].value == "One"
|
||||
assert kvalues["words"][1].value == "Two"
|
||||
assert kvalues["words"][2].value == "Two"
|
||||
assert kvalues["words"][3].value == "Three"
|
||||
|
||||
kvalues = matches.to_dict(details=True)
|
||||
assert kvalues["1"].value == "One"
|
||||
|
||||
assert len(kvalues.values_list["2"]) == 2
|
||||
assert kvalues.values_list["2"][0].value == "Two"
|
||||
assert kvalues.values_list["2"][1].value == "Two"
|
||||
|
||||
assert kvalues["3"].value == "Three"
|
||||
assert kvalues["3bis"].value == "Three"
|
||||
|
||||
assert len(kvalues.values_list["words"]) == 4
|
||||
assert kvalues.values_list["words"][0].value == "One"
|
||||
assert kvalues.values_list["words"][1].value == "Two"
|
||||
assert kvalues.values_list["words"][2].value == "Two"
|
||||
assert kvalues.values_list["words"][3].value == "Three"
|
||||
|
||||
def test_chains(self):
|
||||
input_string = "wordX 10 20 30 40 wordA, wordB, wordC 70 80 wordX"
|
||||
|
||||
matches = Matches(input_string=input_string)
|
||||
|
||||
matches.extend(RePattern(r"\d+", name="digit").matches(input_string))
|
||||
matches.extend(RePattern("[a-zA-Z]+", name="word").matches(input_string))
|
||||
|
||||
assert len(matches) == 11
|
||||
|
||||
a_start = input_string.find('wordA')
|
||||
|
||||
b_start = input_string.find('wordB')
|
||||
b_end = b_start + len('wordB')
|
||||
|
||||
c_start = input_string.find('wordC')
|
||||
c_end = c_start + len('wordC')
|
||||
|
||||
chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "word")
|
||||
assert len(chain_before) == 1
|
||||
assert chain_before[0].value == 'wordA'
|
||||
|
||||
chain_before = matches.chain_before(Match(b_start, b_start), " ,", predicate=lambda match: match.name == "word")
|
||||
assert len(chain_before) == 1
|
||||
assert chain_before[0].value == 'wordA'
|
||||
|
||||
chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_before) == 0
|
||||
|
||||
chain_before = matches.chain_before(a_start, " ,", predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_before) == 4
|
||||
assert [match.value for match in chain_before] == ["40", "30", "20", "10"]
|
||||
|
||||
chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "word")
|
||||
assert len(chain_after) == 1
|
||||
assert chain_after[0].value == 'wordC'
|
||||
|
||||
chain_after = matches.chain_after(Match(b_end, b_end), " ,", predicate=lambda match: match.name == "word")
|
||||
assert len(chain_after) == 1
|
||||
assert chain_after[0].value == 'wordC'
|
||||
|
||||
chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_after) == 0
|
||||
|
||||
chain_after = matches.chain_after(c_end, " ,", predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_after) == 2
|
||||
assert [match.value for match in chain_after] == ["70", "80"]
|
||||
|
||||
chain_after = matches.chain_after(c_end, " ,", end=10000, predicate=lambda match: match.name == "digit")
|
||||
assert len(chain_after) == 2
|
||||
assert [match.value for match in chain_after] == ["70", "80"]
|
||||
|
||||
def test_holes(self):
|
||||
input_string = '1'*10+'2'*10+'3'*10+'4'*10+'5'*10+'6'*10+'7'*10
|
||||
|
||||
hole1 = Match(0, 10, input_string=input_string)
|
||||
hole2 = Match(20, 30, input_string=input_string)
|
||||
hole3 = Match(30, 40, input_string=input_string)
|
||||
hole4 = Match(60, 70, input_string=input_string)
|
||||
|
||||
matches = Matches([hole1, hole2], input_string=input_string)
|
||||
matches.append(hole3)
|
||||
matches.append(hole4)
|
||||
|
||||
holes = list(matches.holes())
|
||||
assert len(holes) == 2
|
||||
assert holes[0].span == (10, 20)
|
||||
assert holes[0].value == '2'*10
|
||||
assert holes[1].span == (40, 60)
|
||||
assert holes[1].value == '5' * 10 + '6' * 10
|
||||
|
||||
holes = list(matches.holes(5, 15))
|
||||
assert len(holes) == 1
|
||||
assert holes[0].span == (10, 15)
|
||||
assert holes[0].value == '2'*5
|
||||
|
||||
holes = list(matches.holes(5, 15, formatter=lambda value: "formatted"))
|
||||
assert len(holes) == 1
|
||||
assert holes[0].span == (10, 15)
|
||||
assert holes[0].value == "formatted"
|
||||
|
||||
holes = list(matches.holes(5, 15, predicate=lambda hole: False))
|
||||
assert len(holes) == 0
|
||||
|
||||
def test_holes_empty(self):
|
||||
input_string = "Test hole on empty matches"
|
||||
matches = Matches(input_string=input_string)
|
||||
holes = matches.holes()
|
||||
assert len(holes) == 1
|
||||
assert holes[0].value == input_string
|
||||
|
||||
def test_holes_seps(self):
|
||||
input_string = "Test hole - with many separators + included"
|
||||
match = StringPattern("many").matches(input_string)
|
||||
|
||||
matches = Matches(match, input_string)
|
||||
holes = matches.holes()
|
||||
|
||||
assert len(holes) == 2
|
||||
|
||||
holes = matches.holes(seps="-+")
|
||||
|
||||
assert len(holes) == 4
|
||||
assert [hole.value for hole in holes] == ["Test hole ", " with ", " separators ", " included"]
|
858
libs/common/rebulk/test/test_pattern.py
Normal file
858
libs/common/rebulk/test/test_pattern.py
Normal file
|
@ -0,0 +1,858 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, unbalanced-tuple-unpacking, len-as-condition
|
||||
|
||||
import re
|
||||
import pytest
|
||||
|
||||
from ..pattern import StringPattern, RePattern, FunctionalPattern, REGEX_AVAILABLE
|
||||
from ..match import Match
|
||||
|
||||
class TestStringPattern(object):
|
||||
"""
|
||||
Tests for StringPattern matching
|
||||
"""
|
||||
|
||||
input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \
|
||||
"which were the Hebrew letter qoph."
|
||||
|
||||
def test_single(self):
|
||||
pattern = StringPattern("Celtic")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_repr(self):
|
||||
pattern = StringPattern("Celtic")
|
||||
|
||||
assert repr(pattern) == '<StringPattern:(\'Celtic\',)>'
|
||||
|
||||
def test_ignore_case(self):
|
||||
pattern = StringPattern("celtic", ignore_case=False)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = StringPattern("celtic", ignore_case=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_private_names(self):
|
||||
pattern = StringPattern("celtic", name="test", private_names=["test"], ignore_case=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert matches[0].private
|
||||
|
||||
def test_ignore_names(self):
|
||||
pattern = StringPattern("celtic", name="test", ignore_names=["test"], ignore_case=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_no_match(self):
|
||||
pattern = StringPattern("Python")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert not matches
|
||||
|
||||
def test_multiple_patterns(self):
|
||||
pattern = StringPattern("playing", "annoyed", "Hebrew")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (18, 25)
|
||||
assert matches[0].value == "playing"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_start_end_kwargs(self):
|
||||
pattern = StringPattern("Abyssinian", start=20, end=40)
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_matches_kwargs(self):
|
||||
pattern = StringPattern("Abyssinian", name="test", value="AB")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "AB"
|
||||
|
||||
def test_post_processor(self):
|
||||
def post_processor(matches, pattern):
|
||||
assert len(matches) == 1
|
||||
assert isinstance(pattern, StringPattern)
|
||||
|
||||
return []
|
||||
|
||||
pattern = StringPattern("Abyssinian", name="test", value="AB", post_processor=post_processor)
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
class TestRePattern(object):
|
||||
"""
|
||||
Tests for RePattern matching
|
||||
"""
|
||||
|
||||
input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \
|
||||
"which were the Hebrew letter qoph."
|
||||
|
||||
def test_single_compiled(self):
|
||||
pattern = RePattern(re.compile("Celt.?c"))
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_single_string(self):
|
||||
pattern = RePattern("Celt.?c")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_single_kwargs(self):
|
||||
pattern = RePattern({"pattern": "celt.?c", "flags": re.IGNORECASE})
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_single_vargs(self):
|
||||
pattern = RePattern(("celt.?c", re.IGNORECASE))
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (28, 34)
|
||||
assert matches[0].value == "Celtic"
|
||||
|
||||
def test_no_match(self):
|
||||
pattern = RePattern("abc.?def")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_shortcuts(self):
|
||||
pattern = RePattern("Celtic-violin", abbreviations=[("-", r"[\W_]+")])
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
pattern = RePattern({"pattern": "celtic-violin", "flags": re.IGNORECASE}, abbreviations=[("-", r"[\W_]+")])
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_multiple_patterns(self):
|
||||
pattern = RePattern("pla.?ing", "ann.?yed", "Heb.?ew")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (18, 25)
|
||||
assert matches[0].value == "playing"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_unnamed_groups(self):
|
||||
pattern = RePattern(r"(Celt.?c)\s+(\w+)")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
|
||||
assert isinstance(parent, Match)
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert len(parent.children) == 2
|
||||
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name is None
|
||||
assert group1.value == "Celtic"
|
||||
assert group1.parent == parent
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name is None
|
||||
assert group2.value == "violin"
|
||||
assert group2.parent == parent
|
||||
|
||||
def test_named_groups(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
|
||||
assert isinstance(parent, Match)
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert len(parent.children) == 2
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
assert group1.parent == parent
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
assert group2.parent == parent
|
||||
|
||||
def test_children(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 2
|
||||
group1, group2 = matches
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
|
||||
def test_children_parent_private(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", children=True, private_parent=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
parent, group1, group2 = matches
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert parent.private
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert not group1.private
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert not group2.private
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
|
||||
def test_parent_children_private(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", private_children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
parent, group1, group2 = matches
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert not parent.private
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.private
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.private
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
|
||||
def test_every(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", every=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
parent, group1, group2 = matches
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert not parent.private
|
||||
assert parent.pattern == pattern
|
||||
assert parent.span == (28, 41)
|
||||
assert parent.name is None
|
||||
assert parent.value == "Celtic violin"
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert not group1.private
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (28, 34)
|
||||
assert group1.name == "param1"
|
||||
assert group1.value == "Celtic"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert not group2.private
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (35, 41)
|
||||
assert group2.name == "param2"
|
||||
assert group2.value == "violin"
|
||||
|
||||
def test_private_names(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", private_names=["param2"], children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 2
|
||||
assert matches[0].name == "param1"
|
||||
assert not matches[0].private
|
||||
assert matches[1].name == "param2"
|
||||
assert matches[1].private
|
||||
|
||||
def test_ignore_names(self):
|
||||
pattern = RePattern(r"(?P<param1>Celt.?c)\s+(?P<param2>\w+)", ignore_names=["param2"], children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "param1"
|
||||
|
||||
def test_matches_kwargs(self):
|
||||
pattern = RePattern("He.rew", name="test", value="HE")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "HE"
|
||||
|
||||
pattern = RePattern("H(e.)(rew)", name="test", value="HE")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "HE"
|
||||
|
||||
children = matches[0].children
|
||||
assert len(children) == 2
|
||||
assert children[0].name == "test"
|
||||
assert children[0].value == "HE"
|
||||
|
||||
assert children[1].name == "test"
|
||||
assert children[1].value == "HE"
|
||||
|
||||
pattern = RePattern("H(?P<first>e.)(?P<second>rew)", name="test", value="HE")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "HE"
|
||||
|
||||
children = matches[0].children
|
||||
assert len(children) == 2
|
||||
assert children[0].name == "first"
|
||||
assert children[0].value == "HE"
|
||||
|
||||
assert children[1].name == "second"
|
||||
assert children[1].value == "HE"
|
||||
|
||||
|
||||
class TestFunctionalPattern(object):
|
||||
"""
|
||||
Tests for FunctionalPattern matching
|
||||
"""
|
||||
|
||||
input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \
|
||||
"which were the Hebrew letter qoph."
|
||||
|
||||
def test_single_vargs(self):
|
||||
def func(input_string):
|
||||
i = input_string.find("fly")
|
||||
if i > -1:
|
||||
return i, i + len("fly"), "fly", "functional"
|
||||
|
||||
pattern = FunctionalPattern(func)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (14, 17)
|
||||
assert matches[0].name == "functional"
|
||||
assert matches[0].value == "fly"
|
||||
|
||||
def test_single_kwargs(self):
|
||||
def func(input_string):
|
||||
i = input_string.find("fly")
|
||||
if i > -1:
|
||||
return {"start": i, "end": i + len("fly"), "name": "functional"}
|
||||
|
||||
pattern = FunctionalPattern(func)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (14, 17)
|
||||
assert matches[0].name == "functional"
|
||||
assert matches[0].value == "fly"
|
||||
|
||||
def test_multiple_objects(self):
|
||||
def func(input_string):
|
||||
i = input_string.find("fly")
|
||||
matches = []
|
||||
if i > -1:
|
||||
matches.append((i, i + len("fly"), {'name': "functional"}))
|
||||
i = input_string.find("annoyed")
|
||||
if i > -1:
|
||||
matches.append((i, i + len("annoyed")))
|
||||
i = input_string.find("Hebrew")
|
||||
if i > -1:
|
||||
matches.append({"start": i, "end": i + len("Hebrew")})
|
||||
return matches
|
||||
|
||||
pattern = FunctionalPattern(func)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (14, 17)
|
||||
assert matches[0].name == "functional"
|
||||
assert matches[0].value == "fly"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_multiple_generator(self):
|
||||
def func(input_string):
|
||||
i = input_string.find("fly")
|
||||
if i > -1:
|
||||
yield (i, i + len("fly"), {'name': "functional"})
|
||||
i = input_string.find("annoyed")
|
||||
if i > -1:
|
||||
yield (i, i + len("annoyed"))
|
||||
i = input_string.find("Hebrew")
|
||||
if i > -1:
|
||||
yield (i, {"end": i + len("Hebrew")})
|
||||
|
||||
pattern = FunctionalPattern(func)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (14, 17)
|
||||
assert matches[0].name == "functional"
|
||||
assert matches[0].value == "fly"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_no_match(self):
|
||||
pattern = FunctionalPattern(lambda x: None)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_multiple_patterns(self):
|
||||
def playing(input_string):
|
||||
i = input_string.find("playing")
|
||||
if i > -1:
|
||||
return i, i + len("playing")
|
||||
|
||||
def annoyed(input_string):
|
||||
i = input_string.find("annoyed")
|
||||
if i > -1:
|
||||
return i, i + len("annoyed")
|
||||
|
||||
def hebrew(input_string):
|
||||
i = input_string.find("Hebrew")
|
||||
if i > -1:
|
||||
return i, i + len("Hebrew")
|
||||
|
||||
pattern = FunctionalPattern(playing, annoyed, hebrew)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 3
|
||||
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (18, 25)
|
||||
assert matches[0].value == "playing"
|
||||
|
||||
assert isinstance(matches[1], Match)
|
||||
assert matches[1].pattern == pattern
|
||||
assert matches[1].span == (46, 53)
|
||||
assert matches[1].value == "annoyed"
|
||||
|
||||
assert isinstance(matches[2], Match)
|
||||
assert matches[2].pattern == pattern
|
||||
assert matches[2].span == (88, 94)
|
||||
assert matches[2].value == "Hebrew"
|
||||
|
||||
def test_matches_kwargs(self):
|
||||
def playing(input_string):
|
||||
i = input_string.find("playing")
|
||||
if i > -1:
|
||||
return i, i + len("playing")
|
||||
|
||||
pattern = FunctionalPattern(playing, name="test", value="PLAY")
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
assert matches[0].value == "PLAY"
|
||||
|
||||
|
||||
class TestValue(object):
|
||||
"""
|
||||
Tests for value option
|
||||
"""
|
||||
|
||||
input_string = "This string contains 1849 a number"
|
||||
|
||||
def test_str_value(self):
|
||||
pattern = StringPattern("1849", name="dummy", value="test")
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (21, 25)
|
||||
assert matches[0].value == "test"
|
||||
|
||||
def test_dict_child_value(self):
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
formatter={'intParam': lambda x: int(x) * 2,
|
||||
'strParam': lambda x: "really " + x},
|
||||
format_all=True,
|
||||
value={'intParam': 'INT_PARAM_VALUE'})
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
assert len(parent.children) == 2
|
||||
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (12, 20)
|
||||
assert group1.value == "really contains"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (21, 25)
|
||||
assert group2.value == 'INT_PARAM_VALUE'
|
||||
|
||||
def test_dict_default_value(self):
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
formatter={'intParam': lambda x: int(x) * 2,
|
||||
'strParam': lambda x: "really " + x},
|
||||
format_all=True,
|
||||
value={'__children__': 'CHILD', 'strParam': 'STR_VALUE', '__parent__': 'PARENT'})
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
assert parent.value == "PARENT"
|
||||
assert len(parent.children) == 2
|
||||
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (12, 20)
|
||||
assert group1.value == "STR_VALUE"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (21, 25)
|
||||
assert group2.value == "CHILD"
|
||||
|
||||
|
||||
class TestFormatter(object):
|
||||
"""
|
||||
Tests for formatter option
|
||||
"""
|
||||
|
||||
input_string = "This string contains 1849 a number"
|
||||
|
||||
def test_single_string(self):
|
||||
pattern = StringPattern("1849", name="dummy", formatter=lambda x: int(x) / 2)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (21, 25)
|
||||
assert matches[0].value == 1849 / 2
|
||||
|
||||
def test_single_re_no_group(self):
|
||||
pattern = RePattern(r"\d+", formatter=lambda x: int(x) * 2)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (21, 25)
|
||||
assert matches[0].value == 1849 * 2
|
||||
|
||||
def test_single_re_named_groups(self):
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
formatter={'intParam': lambda x: int(x) * 2,
|
||||
'strParam': lambda x: "really " + x}, format_all=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
parent = matches[0]
|
||||
assert len(parent.children) == 2
|
||||
|
||||
group1, group2 = parent.children
|
||||
|
||||
assert isinstance(group1, Match)
|
||||
assert group1.pattern == pattern
|
||||
assert group1.span == (12, 20)
|
||||
assert group1.value == "really contains"
|
||||
|
||||
assert isinstance(group2, Match)
|
||||
assert group2.pattern == pattern
|
||||
assert group2.span == (21, 25)
|
||||
assert group2.value == 1849 * 2
|
||||
|
||||
def test_repeated_captures_option(self):
|
||||
pattern = RePattern(r"\[(\d+)\](?:-(\d+))*")
|
||||
|
||||
matches = list(pattern.matches("[02]-03-04-05-06"))
|
||||
assert len(matches) == 1
|
||||
|
||||
match = matches[0]
|
||||
if REGEX_AVAILABLE:
|
||||
assert len(match.children) == 5
|
||||
assert [child.value for child in match.children] == ["02", "03", "04", "05", "06"]
|
||||
else:
|
||||
assert len(match.children) == 2
|
||||
assert [child.value for child in match.children] == ["02", "06"]
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
RePattern(r"\[(\d+)\](?:-(\d+))*", repeated_captures=True)
|
||||
|
||||
pattern = RePattern(r"\[(\d+)\](?:-(\d+))*", repeated_captures=False)
|
||||
|
||||
matches = list(pattern.matches("[02]-03-04-05-06"))
|
||||
assert len(matches) == 1
|
||||
|
||||
match = matches[0]
|
||||
assert len(match.children) == 2
|
||||
assert [child.value for child in match.children] == ["02", "06"]
|
||||
|
||||
def test_single_functional(self):
|
||||
def digit(input_string):
|
||||
i = input_string.find("1849")
|
||||
if i > -1:
|
||||
return i, i + len("1849")
|
||||
|
||||
pattern = FunctionalPattern(digit, formatter=lambda x: int(x) * 3)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Match)
|
||||
assert matches[0].pattern == pattern
|
||||
assert matches[0].span == (21, 25)
|
||||
assert matches[0].value == 1849 * 3
|
||||
|
||||
|
||||
class TestValidator(object):
|
||||
"""
|
||||
Tests for validator option
|
||||
"""
|
||||
|
||||
input_string = "This string contains 1849 a number"
|
||||
|
||||
@staticmethod
|
||||
def true_validator(match):
|
||||
return int(match.value) < 1850
|
||||
|
||||
@staticmethod
|
||||
def false_validator(match):
|
||||
return int(match.value) >= 1850
|
||||
|
||||
def test_single_string(self):
|
||||
pattern = StringPattern("1849", name="dummy", validator=self.false_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = StringPattern("1849", validator=self.true_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_single_re_no_group(self):
|
||||
pattern = RePattern(r"\d+", validator=self.false_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = RePattern(r"\d+", validator=self.true_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_single_re_named_groups(self):
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
validator={'intParam': self.false_validator}, validate_all=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = RePattern(r"(?P<strParam>cont.?ins)\s+(?P<intParam>\d+)",
|
||||
validator={'intParam': self.true_validator}, validate_all=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_validate_all(self):
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=lambda match: match.value < 100,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=lambda match: match.value > 100,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def invalid_func(match):
|
||||
if match.name == 'intParam':
|
||||
return True
|
||||
return match.value.startswith('abc')
|
||||
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=invalid_func, validate_all=True,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
def func(match):
|
||||
if match.name == 'intParam':
|
||||
return True
|
||||
return match.value.startswith('contains')
|
||||
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, validator=func, validate_all=True,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
|
||||
def test_format_all(self):
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int,
|
||||
children=True)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
||||
for match in matches:
|
||||
assert match.value is not None
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
pattern = RePattern(r"contains (?P<intParam>\d+)", formatter=int, format_all=True)
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
for match in matches:
|
||||
assert match.value is not None
|
||||
|
||||
def test_single_functional(self):
|
||||
def digit(input_string):
|
||||
i = input_string.find("1849")
|
||||
if i > -1:
|
||||
return i, i + len("1849")
|
||||
|
||||
pattern = FunctionalPattern(digit, validator=self.false_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 0
|
||||
|
||||
pattern = FunctionalPattern(digit, validator=self.true_validator)
|
||||
|
||||
matches = list(pattern.matches(self.input_string))
|
||||
assert len(matches) == 1
|
215
libs/common/rebulk/test/test_processors.py
Normal file
215
libs/common/rebulk/test/test_processors.py
Normal file
|
@ -0,0 +1,215 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
|
||||
|
||||
from ..pattern import StringPattern, RePattern
|
||||
from ..processors import ConflictSolver
|
||||
from ..rules import execute_rule
|
||||
from ..match import Matches
|
||||
|
||||
|
||||
def test_conflict_1():
|
||||
input_string = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
pattern = StringPattern("ijklmn", "kl", "abcdef", "ab", "ef", "yz")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
|
||||
assert values == ["ijklmn", "abcdef", "yz"]
|
||||
|
||||
|
||||
def test_conflict_2():
|
||||
input_string = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
pattern = StringPattern("ijklmn", "jklmnopqrst")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
|
||||
assert values == ["jklmnopqrst"]
|
||||
|
||||
|
||||
def test_conflict_3():
|
||||
input_string = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
pattern = StringPattern("ijklmnopqrst", "jklmnopqrst")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
|
||||
assert values == ["ijklmnopqrst"]
|
||||
|
||||
|
||||
def test_conflict_4():
|
||||
input_string = "123456789"
|
||||
|
||||
pattern = StringPattern("123", "456789")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
assert values == ["123", "456789"]
|
||||
|
||||
|
||||
def test_conflict_5():
|
||||
input_string = "123456789"
|
||||
|
||||
pattern = StringPattern("123456", "789")
|
||||
matches = Matches(pattern.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
|
||||
values = [x.value for x in matches]
|
||||
assert values == ["123456", "789"]
|
||||
|
||||
|
||||
def test_prefer_longer_parent():
|
||||
input_string = "xxx.1x02.xxx"
|
||||
|
||||
re1 = RePattern("([0-9]+)x([0-9]+)", name='prefer', children=True, formatter=int)
|
||||
re2 = RePattern("x([0-9]+)", name='skip', children=True)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 2
|
||||
assert matches[0].value == 1
|
||||
assert matches[1].value == 2
|
||||
|
||||
|
||||
def test_conflict_solver_1():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: '__default__')
|
||||
re2 = StringPattern("34567")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "2345678"
|
||||
|
||||
|
||||
def test_conflict_solver_2():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: '__default__')
|
||||
re2 = StringPattern("34567", conflict_solver=lambda match, conflicting: conflicting)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "34567"
|
||||
|
||||
|
||||
def test_conflict_solver_3():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: match)
|
||||
re2 = StringPattern("34567")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "34567"
|
||||
|
||||
|
||||
def test_conflict_solver_4():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678")
|
||||
re2 = StringPattern("34567", conflict_solver=lambda match, conflicting: conflicting)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "34567"
|
||||
|
||||
|
||||
def test_conflict_solver_5():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678", conflict_solver=lambda match, conflicting: conflicting)
|
||||
re2 = StringPattern("34567")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "2345678"
|
||||
|
||||
|
||||
def test_conflict_solver_6():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("2345678")
|
||||
re2 = StringPattern("34567", conflict_solver=lambda match, conflicting: conflicting)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "34567"
|
||||
|
||||
|
||||
def test_conflict_solver_7():
|
||||
input_string = "102"
|
||||
|
||||
re1 = StringPattern("102")
|
||||
re2 = StringPattern("02")
|
||||
|
||||
matches = Matches(re2.matches(input_string))
|
||||
matches.extend(re1.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "102"
|
||||
|
||||
|
||||
def test_unresolved():
|
||||
input_string = "123456789"
|
||||
|
||||
re1 = StringPattern("23456")
|
||||
re2 = StringPattern("34567")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 2
|
||||
|
||||
re1 = StringPattern("34567")
|
||||
re2 = StringPattern("2345678", conflict_solver=lambda match, conflicting: None)
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 2
|
||||
|
||||
re1 = StringPattern("34567", conflict_solver=lambda match, conflicting: None)
|
||||
re2 = StringPattern("2345678")
|
||||
|
||||
matches = Matches(re1.matches(input_string))
|
||||
matches.extend(re2.matches(input_string))
|
||||
|
||||
execute_rule(ConflictSolver(), matches, None)
|
||||
assert len(matches) == 2
|
419
libs/common/rebulk/test/test_rebulk.py
Normal file
419
libs/common/rebulk/test/test_rebulk.py
Normal file
|
@ -0,0 +1,419 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
|
||||
|
||||
from ..rebulk import Rebulk
|
||||
from ..rules import Rule
|
||||
from . import rebulk_rules_module as rm
|
||||
|
||||
|
||||
def test_rebulk_simple():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.string("quick")
|
||||
rebulk.regex("f.x")
|
||||
|
||||
def func(input_string):
|
||||
i = input_string.find("over")
|
||||
if i > -1:
|
||||
return i, i + len("over")
|
||||
|
||||
rebulk.functional(func)
|
||||
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "fox"
|
||||
assert matches[2].value == "over"
|
||||
|
||||
|
||||
def test_rebulk_composition():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.string("quick")
|
||||
rebulk.rebulk(Rebulk().regex("f.x"))
|
||||
|
||||
rebulk.rebulk(Rebulk(disabled=lambda context: True).functional(lambda string: None))
|
||||
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "fox"
|
||||
|
||||
|
||||
def test_rebulk_context():
|
||||
rebulk = Rebulk()
|
||||
|
||||
context = {'nostring': True, 'word': 'lazy'}
|
||||
|
||||
rebulk.string("quick", disabled=lambda context: context.get('nostring', False))
|
||||
rebulk.regex("f.x", disabled=lambda context: context.get('noregex', False))
|
||||
|
||||
def func(input_string, context):
|
||||
word = context.get('word', 'over')
|
||||
i = input_string.find(word)
|
||||
if i > -1:
|
||||
return i, i + len(word)
|
||||
|
||||
rebulk.functional(func)
|
||||
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = rebulk.matches(input_string, context)
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "fox"
|
||||
assert matches[1].value == "lazy"
|
||||
|
||||
|
||||
def test_rebulk_prefer_longer():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = Rebulk().string("quick").string("own").regex("br.{2}n").matches(input_string)
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "brown"
|
||||
|
||||
|
||||
def test_rebulk_defaults():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
def func(input_string):
|
||||
i = input_string.find("fox")
|
||||
if i > -1:
|
||||
return i, i + len("fox")
|
||||
|
||||
matches = Rebulk()\
|
||||
.string_defaults(name="string", tags=["a", "b"])\
|
||||
.regex_defaults(name="regex") \
|
||||
.functional_defaults(name="functional") \
|
||||
.string("quick", tags=["c"])\
|
||||
.functional(func)\
|
||||
.regex("br.{2}n") \
|
||||
.matches(input_string)
|
||||
assert matches[0].name == "string"
|
||||
assert matches[0].tags == ["a", "b", "c"]
|
||||
assert matches[1].name == "functional"
|
||||
assert matches[2].name == "regex"
|
||||
|
||||
matches = Rebulk() \
|
||||
.defaults(name="default", tags=["0"])\
|
||||
.string_defaults(name="string", tags=["a", "b"]) \
|
||||
.functional_defaults(name="functional", tags=["1"]) \
|
||||
.string("quick", tags=["c"]) \
|
||||
.functional(func) \
|
||||
.regex("br.{2}n") \
|
||||
.matches(input_string)
|
||||
assert matches[0].name == "string"
|
||||
assert matches[0].tags == ["0", "a", "b", "c"]
|
||||
assert matches[1].name == "functional"
|
||||
assert matches[1].tags == ["0", "1"]
|
||||
assert matches[2].name == "default"
|
||||
assert matches[2].tags == ["0"]
|
||||
|
||||
|
||||
def test_rebulk_rebulk():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
base = Rebulk().string("quick")
|
||||
child = Rebulk().string("own").regex("br.{2}n")
|
||||
|
||||
matches = base.rebulk(child).matches(input_string)
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "brown"
|
||||
|
||||
|
||||
def test_rebulk_no_default():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = Rebulk(default_rules=False).string("quick").string("own").regex("br.{2}n").matches(input_string)
|
||||
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "own"
|
||||
assert matches[2].value == "brown"
|
||||
|
||||
|
||||
def test_rebulk_empty_match():
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = Rebulk(default_rules=False).string("quick").string("own").regex("br(.*?)own", children=True)\
|
||||
.matches(input_string)
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
assert matches[0].value == "quick"
|
||||
assert matches[1].value == "own"
|
||||
|
||||
|
||||
def test_rebulk_tags_names():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.string("quick", name="str", tags=["first", "other"])
|
||||
rebulk.regex("f.x", tags="other")
|
||||
|
||||
def func(input_string):
|
||||
i = input_string.find("over")
|
||||
if i > -1:
|
||||
return i, i + len("over"), {'tags': ['custom']}
|
||||
|
||||
rebulk.functional(func, name="fn")
|
||||
|
||||
def func2(input_string):
|
||||
i = input_string.find("lazy")
|
||||
if i > -1:
|
||||
return {'start': i, 'end': i + len("lazy"), 'tags': ['custom']}
|
||||
|
||||
rebulk.functional(func2, name="fn")
|
||||
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
assert len(matches) == 4
|
||||
|
||||
assert len(matches.named("str")) == 1
|
||||
assert len(matches.named("fn")) == 2
|
||||
assert len(matches.named("false")) == 0
|
||||
assert len(matches.tagged("false")) == 0
|
||||
assert len(matches.tagged("first")) == 1
|
||||
assert len(matches.tagged("other")) == 2
|
||||
assert len(matches.tagged("custom")) == 2
|
||||
|
||||
|
||||
def test_rebulk_rules_1():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.regex(r'\d{4}', name="year")
|
||||
rebulk.rules(rm.RemoveAllButLastYear)
|
||||
|
||||
matches = rebulk.matches("1984 keep only last 1968 entry 1982 case")
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "1982"
|
||||
|
||||
|
||||
def test_rebulk_rules_2():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.regex(r'\d{4}', name="year")
|
||||
rebulk.string(r'year', name="yearPrefix", private=True)
|
||||
rebulk.string(r'keep', name="yearSuffix", private=True)
|
||||
rebulk.rules(rm.PrefixedSuffixedYear)
|
||||
|
||||
matches = rebulk.matches("Keep suffix 1984 keep prefixed year 1968 and remove the rest 1982")
|
||||
assert len(matches) == 2
|
||||
assert matches[0].value == "1984"
|
||||
assert matches[1].value == "1968"
|
||||
|
||||
|
||||
def test_rebulk_rules_3():
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.regex(r'\d{4}', name="year")
|
||||
rebulk.string(r'year', name="yearPrefix", private=True)
|
||||
rebulk.string(r'keep', name="yearSuffix", private=True)
|
||||
rebulk.rules(rm.PrefixedSuffixedYearNoLambda)
|
||||
|
||||
matches = rebulk.matches("Keep suffix 1984 keep prefixed year 1968 and remove the rest 1982")
|
||||
assert len(matches) == 2
|
||||
assert matches[0].value == "1984"
|
||||
assert matches[1].value == "1968"
|
||||
|
||||
|
||||
def test_rebulk_rules_4():
|
||||
class FirstOnlyRule(Rule):
|
||||
def when(self, matches, context):
|
||||
grabbed = matches.named("grabbed", 0)
|
||||
if grabbed and matches.previous(grabbed):
|
||||
return grabbed
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.regex("This match (.*?)grabbed", name="grabbed")
|
||||
rebulk.regex("if it's (.*?)first match", private=True)
|
||||
|
||||
rebulk.rules(FirstOnlyRule)
|
||||
|
||||
matches = rebulk.matches("This match is grabbed only if it's the first match")
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "This match is grabbed"
|
||||
|
||||
matches = rebulk.matches("if it's NOT the first match, This match is NOT grabbed")
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
class TestMarkers(object):
|
||||
def test_one_marker(self):
|
||||
class MarkerRule(Rule):
|
||||
def when(self, matches, context):
|
||||
word_match = matches.named("word", 0)
|
||||
marker = matches.markers.at_match(word_match, lambda marker: marker.name == "mark1", 0)
|
||||
if not marker:
|
||||
return word_match
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
|
||||
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
|
||||
.regex(r'\[.*?\]', marker=True, name="mark2") \
|
||||
.string("word", name="word") \
|
||||
.rules(MarkerRule)
|
||||
|
||||
matches = rebulk.matches("grab (word) only if it's in parenthesis")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "word"
|
||||
|
||||
matches = rebulk.matches("don't grab [word] if it's in braket")
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = rebulk.matches("don't grab word at all")
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_multiple_marker(self):
|
||||
class MarkerRule(Rule):
|
||||
def when(self, matches, context):
|
||||
word_match = matches.named("word", 0)
|
||||
marker = matches.markers.at_match(word_match,
|
||||
lambda marker: marker.name == "mark1" or marker.name == "mark2")
|
||||
if len(marker) < 2:
|
||||
return word_match
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
|
||||
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
|
||||
.regex(r'\[.*?\]', marker=True, name="mark2") \
|
||||
.regex("w.*?d", name="word") \
|
||||
.rules(MarkerRule)
|
||||
|
||||
matches = rebulk.matches("[grab (word) only] if it's in parenthesis and brakets")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "word"
|
||||
|
||||
matches = rebulk.matches("[don't grab](word)[if brakets are outside]")
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = rebulk.matches("(grab w[or)d even] if it's partially in parenthesis and brakets")
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "w[or)d"
|
||||
|
||||
def test_at_index_marker(self):
|
||||
class MarkerRule(Rule):
|
||||
def when(self, matches, context):
|
||||
word_match = matches.named("word", 0)
|
||||
marker = matches.markers.at_index(word_match.start,
|
||||
lambda marker: marker.name == "mark1", 0)
|
||||
if not marker:
|
||||
return word_match
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
|
||||
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
|
||||
.regex("w.*?d", name="word") \
|
||||
.rules(MarkerRule)
|
||||
|
||||
matches = rebulk.matches("gr(ab wo)rd only if starting of match is inside parenthesis")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "wo)rd"
|
||||
|
||||
matches = rebulk.matches("don't grab wo(rd if starting of match is not inside parenthesis")
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_remove_marker(self):
|
||||
class MarkerRule(Rule):
|
||||
def when(self, matches, context):
|
||||
marker = matches.markers.named("mark1", 0)
|
||||
if marker:
|
||||
return marker
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.markers.remove(when_response)
|
||||
|
||||
rebulk = Rebulk().regex(r'\(.*?\)', marker=True, name="mark1") \
|
||||
.regex("w.*?d", name="word") \
|
||||
.rules(MarkerRule)
|
||||
|
||||
matches = rebulk.matches("grab word event (if it's not) inside parenthesis")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].value == "word"
|
||||
|
||||
assert not matches.markers
|
||||
|
||||
|
||||
class TestUnicode(object):
|
||||
def test_rebulk_simple(self):
|
||||
input_string = u"敏捷的棕色狐狸跳過懶狗"
|
||||
|
||||
rebulk = Rebulk()
|
||||
|
||||
rebulk.string(u"敏")
|
||||
rebulk.regex(u"捷")
|
||||
|
||||
def func(input_string):
|
||||
i = input_string.find(u"的")
|
||||
if i > -1:
|
||||
return i, i + len(u"的")
|
||||
|
||||
rebulk.functional(func)
|
||||
|
||||
matches = rebulk.matches(input_string)
|
||||
assert len(matches) == 3
|
||||
|
||||
assert matches[0].value == u"敏"
|
||||
assert matches[1].value == u"捷"
|
||||
assert matches[2].value == u"的"
|
||||
|
||||
|
||||
class TestImmutable(object):
|
||||
def test_starting(self):
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
matches = Rebulk().string("quick").string("over").string("fox").matches(input_string)
|
||||
|
||||
for i in range(0, len(input_string)):
|
||||
starting = matches.starting(i)
|
||||
for match in list(starting):
|
||||
starting.remove(match)
|
||||
|
||||
assert len(matches) == 3
|
||||
|
||||
def test_ending(self):
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
matches = Rebulk().string("quick").string("over").string("fox").matches(input_string)
|
||||
|
||||
for i in range(0, len(input_string)):
|
||||
starting = matches.ending(i)
|
||||
for match in list(starting):
|
||||
starting.remove(match)
|
||||
|
||||
assert len(matches) == 3
|
||||
|
||||
def test_named(self):
|
||||
input_string = "The quick brown fox jumps over the lazy dog"
|
||||
matches = Rebulk().defaults(name='test').string("quick").string("over").string("fox").matches(input_string)
|
||||
|
||||
named = matches.named('test')
|
||||
for match in list(named):
|
||||
named.remove(match)
|
||||
|
||||
assert len(named) == 0
|
||||
assert len(matches) == 3
|
197
libs/common/rebulk/test/test_rules.py
Normal file
197
libs/common/rebulk/test/test_rules.py
Normal file
|
@ -0,0 +1,197 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, no-member, len-as-condition
|
||||
import pytest
|
||||
from rebulk.test.default_rules_module import RuleRemove0, RuleAppend0, RuleRename0, RuleAppend1, RuleRemove1, \
|
||||
RuleRename1, RuleAppend2, RuleRename2, RuleAppend3, RuleRename3, RuleAppendTags0, RuleRemoveTags0, \
|
||||
RuleAppendTags1, RuleRemoveTags1
|
||||
|
||||
from ..rules import Rules
|
||||
from ..match import Matches, Match
|
||||
|
||||
from .rules_module import Rule1, Rule2, Rule3, Rule0, Rule1Disabled
|
||||
from . import rules_module as rm
|
||||
|
||||
|
||||
def test_rule_priority():
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule1, Rule2())
|
||||
|
||||
rules.execute_all_rules(matches, {})
|
||||
assert len(matches) == 0
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule1(), Rule0)
|
||||
|
||||
rules.execute_all_rules(matches, {})
|
||||
assert len(matches) == 1
|
||||
assert matches[0] == Match(3, 4)
|
||||
|
||||
|
||||
def test_rules_duplicates():
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule1, Rule1)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
|
||||
def test_rule_disabled():
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule1Disabled(), Rule2())
|
||||
|
||||
rules.execute_all_rules(matches, {})
|
||||
assert len(matches) == 2
|
||||
assert matches[0] == Match(1, 2)
|
||||
assert matches[1] == Match(3, 4)
|
||||
|
||||
|
||||
def test_rule_when():
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules = Rules(Rule3())
|
||||
|
||||
rules.execute_all_rules(matches, {'when': False})
|
||||
assert len(matches) == 1
|
||||
assert matches[0] == Match(1, 2)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
|
||||
rules.execute_all_rules(matches, {'when': True})
|
||||
assert len(matches) == 2
|
||||
assert matches[0] == Match(1, 2)
|
||||
assert matches[1] == Match(3, 4)
|
||||
|
||||
|
||||
class TestDefaultRules(object):
|
||||
def test_remove(self):
|
||||
rules = Rules(RuleRemove0)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
rules = Rules(RuleRemove1)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_append(self):
|
||||
rules = Rules(RuleAppend0)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
rules = Rules(RuleAppend1)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 2
|
||||
|
||||
rules = Rules(RuleAppend2)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 2
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
rules = Rules(RuleAppend3)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 2
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
def test_rename(self):
|
||||
rules = Rules(RuleRename0)
|
||||
|
||||
matches = Matches([Match(1, 2, name='original')])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('original')) == 1
|
||||
assert len(matches.named('renamed')) == 0
|
||||
|
||||
rules = Rules(RuleRename1)
|
||||
|
||||
matches = Matches([Match(5, 10, name='original')])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('original')) == 0
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
rules = Rules(RuleRename2)
|
||||
|
||||
matches = Matches([Match(5, 10, name='original')])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('original')) == 0
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
rules = Rules(RuleRename3)
|
||||
|
||||
matches = Matches([Match(5, 10, name='original')])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('original')) == 0
|
||||
assert len(matches.named('renamed')) == 1
|
||||
|
||||
def test_append_tags(self):
|
||||
rules = Rules(RuleAppendTags0)
|
||||
|
||||
matches = Matches([Match(1, 2, name='tags', tags=['other'])])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('tags')) == 1
|
||||
assert matches.named('tags', index=0).tags == ['other', 'new-tag']
|
||||
|
||||
rules = Rules(RuleAppendTags1)
|
||||
|
||||
matches = Matches([Match(1, 2, name='tags', tags=['other'])])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('tags')) == 1
|
||||
assert matches.named('tags', index=0).tags == ['other', 'new-tag']
|
||||
|
||||
def test_remove_tags(self):
|
||||
rules = Rules(RuleRemoveTags0)
|
||||
|
||||
matches = Matches([Match(1, 2, name='tags', tags=['other', 'new-tag'])])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('tags')) == 1
|
||||
assert matches.named('tags', index=0).tags == ['other']
|
||||
|
||||
rules = Rules(RuleRemoveTags1)
|
||||
|
||||
matches = Matches([Match(1, 2, name='tags', tags=['other', 'new-tag'])])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches.named('tags')) == 1
|
||||
assert matches.named('tags', index=0).tags == ['other']
|
||||
|
||||
|
||||
def test_rule_module():
|
||||
rules = Rules(rm)
|
||||
|
||||
matches = Matches([Match(1, 2)])
|
||||
rules.execute_all_rules(matches, {})
|
||||
|
||||
assert len(matches) == 1
|
||||
|
||||
|
||||
def test_rule_repr():
|
||||
assert str(Rule0()) == "<Rule0>"
|
||||
assert str(Rule1()) == "<Rule1>"
|
||||
assert str(Rule2()) == "<Rule2>"
|
||||
assert str(Rule1Disabled()) == "<Disabled Rule1>"
|
111
libs/common/rebulk/test/test_toposort.py
Normal file
111
libs/common/rebulk/test/test_toposort.py
Normal file
|
@ -0,0 +1,111 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2014 True Blade Systems, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Original:
|
||||
# - https://bitbucket.org/ericvsmith/toposort (1.4)
|
||||
# Modifications:
|
||||
# - port to pytest
|
||||
# pylint: skip-file
|
||||
|
||||
import pytest
|
||||
from ..toposort import toposort, toposort_flatten, CyclicDependency
|
||||
|
||||
|
||||
class TestCase(object):
|
||||
def test_simple(self):
|
||||
results = list(toposort({2: set([11]), 9: set([11, 8]), 10: set([11, 3]), 11: set([7, 5]), 8: set([7, 3])}))
|
||||
expected = [set([3, 5, 7]), set([8, 11]), set([2, 9, 10])]
|
||||
assert results == expected
|
||||
|
||||
# make sure self dependencies are ignored
|
||||
results = list(toposort({2: set([2, 11]), 9: set([11, 8]), 10: set([10, 11, 3]), 11: set([7, 5]), 8: set([7, 3])}))
|
||||
expected = [set([3, 5, 7]), set([8, 11]), set([2, 9, 10])]
|
||||
assert results == expected
|
||||
|
||||
assert list(toposort({1: set()})) == [set([1])]
|
||||
assert list(toposort({1: set([1])})) == [set([1])]
|
||||
|
||||
def test_no_dependencies(self):
|
||||
assert list(toposort({1: set([2]), 3: set([4]), 5: set([6])})) == [set([2, 4, 6]), set([1, 3, 5])]
|
||||
assert list(toposort({1: set(), 3: set(), 5: set()})) == [set([1, 3, 5])]
|
||||
|
||||
def test_empty(self):
|
||||
assert list(toposort({})) == []
|
||||
|
||||
def test_strings(self):
|
||||
results = list(toposort({'2': set(['11']), '9': set(['11', '8']), '10': set(['11', '3']), '11': set(['7', '5']), '8': set(['7', '3'])}))
|
||||
expected = [set(['3', '5', '7']), set(['8', '11']), set(['2', '9', '10'])]
|
||||
assert results == expected
|
||||
|
||||
def test_objects(self):
|
||||
o2 = object()
|
||||
o3 = object()
|
||||
o5 = object()
|
||||
o7 = object()
|
||||
o8 = object()
|
||||
o9 = object()
|
||||
o10 = object()
|
||||
o11 = object()
|
||||
results = list(toposort({o2: set([o11]), o9: set([o11, o8]), o10: set([o11, o3]), o11: set([o7, o5]), o8: set([o7, o3, o8])}))
|
||||
expected = [set([o3, o5, o7]), set([o8, o11]), set([o2, o9, o10])]
|
||||
assert results == expected
|
||||
|
||||
def test_cycle(self):
|
||||
# a simple, 2 element cycle
|
||||
with pytest.raises(CyclicDependency):
|
||||
list(toposort({1: set([2]), 2: set([1])}))
|
||||
|
||||
# an indirect cycle
|
||||
with pytest.raises(CyclicDependency):
|
||||
list(toposort({1: set([2]), 2: set([3]), 3: set([1])}))
|
||||
|
||||
def test_input_not_modified(self):
|
||||
data = {2: set([11]),
|
||||
9: set([11, 8]),
|
||||
10: set([11, 3]),
|
||||
11: set([7, 5]),
|
||||
8: set([7, 3, 8]), # includes something self-referential
|
||||
}
|
||||
orig = data.copy()
|
||||
results = list(toposort(data))
|
||||
assert data == orig
|
||||
|
||||
def test_input_not_modified_when_cycle_error(self):
|
||||
data = {1: set([2]),
|
||||
2: set([1]),
|
||||
3: set([4]),
|
||||
}
|
||||
orig = data.copy()
|
||||
with pytest.raises(CyclicDependency):
|
||||
list(toposort(data))
|
||||
assert data == orig
|
||||
|
||||
|
||||
class TestCaseAll(object):
|
||||
def test_sort_flatten(self):
|
||||
data = {2: set([11]),
|
||||
9: set([11, 8]),
|
||||
10: set([11, 3]),
|
||||
11: set([7, 5]),
|
||||
8: set([7, 3, 8]), # includes something self-referential
|
||||
}
|
||||
expected = [set([3, 5, 7]), set([8, 11]), set([2, 9, 10])]
|
||||
assert list(toposort(data)) == expected
|
||||
|
||||
# now check the sorted results
|
||||
results = []
|
||||
for item in expected:
|
||||
results.extend(sorted(item))
|
||||
assert toposort_flatten(data) == results
|
||||
|
||||
# and the unsorted results. break the results up into groups to compare them
|
||||
actual = toposort_flatten(data, False)
|
||||
results = [set([i for i in actual[0:3]]), set([i for i in actual[3:5]]), set([i for i in actual[5:8]])]
|
||||
assert results == expected
|
64
libs/common/rebulk/test/test_validators.py
Normal file
64
libs/common/rebulk/test/test_validators.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name,len-as-condition
|
||||
|
||||
from functools import partial
|
||||
|
||||
from rebulk.pattern import StringPattern
|
||||
|
||||
from ..validators import chars_before, chars_after, chars_surround, validators
|
||||
|
||||
chars = ' _.'
|
||||
left = partial(chars_before, chars)
|
||||
right = partial(chars_after, chars)
|
||||
surrounding = partial(chars_surround, chars)
|
||||
|
||||
|
||||
def test_left_chars():
|
||||
matches = list(StringPattern("word", validator=left).matches("xxxwordxxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=left).matches("xxx_wordxxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
matches = list(StringPattern("word", validator=left).matches("wordxxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
|
||||
def test_right_chars():
|
||||
matches = list(StringPattern("word", validator=right).matches("xxxwordxxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=right).matches("xxxword.xxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
matches = list(StringPattern("word", validator=right).matches("xxxword"))
|
||||
assert len(matches) == 1
|
||||
|
||||
|
||||
def test_surrounding_chars():
|
||||
matches = list(StringPattern("word", validator=surrounding).matches("xxxword xxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=surrounding).matches("xxx.wordxxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=surrounding).matches("xxx word_xxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
matches = list(StringPattern("word", validator=surrounding).matches("word"))
|
||||
assert len(matches) == 1
|
||||
|
||||
|
||||
def test_chain():
|
||||
matches = list(StringPattern("word", validator=validators(left, right)).matches("xxxword xxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=validators(left, right)).matches("xxx.wordxxx"))
|
||||
assert len(matches) == 0
|
||||
|
||||
matches = list(StringPattern("word", validator=validators(left, right)).matches("xxx word_xxx"))
|
||||
assert len(matches) == 1
|
||||
|
||||
matches = list(StringPattern("word", validator=validators(left, right)).matches("word"))
|
||||
assert len(matches) == 1
|
Loading…
Add table
Add a link
Reference in a new issue