mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-20 13:23:18 -07:00
Move common libs to libs/common
This commit is contained in:
parent
8dbb1a2451
commit
1f4bd41bcc
1612 changed files with 962 additions and 10 deletions
90
libs/common/guessit/rules/properties/crc.py
Normal file
90
libs/common/guessit/rules/properties/crc.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
crc and uuid properties
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def crc(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(validator=seps_surround)
|
||||
|
||||
rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ['episode', 'season']
|
||||
else '__default__')
|
||||
|
||||
rebulk.functional(guess_idnumber, name='uuid',
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name in ['episode', 'season']
|
||||
else '__default__')
|
||||
return rebulk
|
||||
|
||||
|
||||
_DIGIT = 0
|
||||
_LETTER = 1
|
||||
_OTHER = 2
|
||||
|
||||
_idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
|
||||
|
||||
|
||||
def guess_idnumber(string):
|
||||
"""
|
||||
Guess id number function
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint:disable=invalid-name
|
||||
ret = []
|
||||
|
||||
matches = list(_idnum.finditer(string))
|
||||
for match in matches:
|
||||
result = match.groupdict()
|
||||
switch_count = 0
|
||||
switch_letter_count = 0
|
||||
letter_count = 0
|
||||
last_letter = None
|
||||
|
||||
last = _LETTER
|
||||
for c in result['uuid']:
|
||||
if c in '0123456789':
|
||||
ci = _DIGIT
|
||||
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||
ci = _LETTER
|
||||
if c != last_letter:
|
||||
switch_letter_count += 1
|
||||
last_letter = c
|
||||
letter_count += 1
|
||||
else:
|
||||
ci = _OTHER
|
||||
|
||||
if ci != last:
|
||||
switch_count += 1
|
||||
|
||||
last = ci
|
||||
|
||||
# only return the result as probable if we alternate often between
|
||||
# char type (more likely for hash values than for common words)
|
||||
switch_ratio = float(switch_count) / len(result['uuid'])
|
||||
letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
|
||||
|
||||
if switch_ratio > 0.4 and letters_ratio > 0.4:
|
||||
ret.append(match.span())
|
||||
|
||||
return ret
|
Loading…
Add table
Add a link
Reference in a new issue