mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-14 10:36:52 -07:00
Update vendored beets to 1.6.0
Updates colorama to 0.4.6 Adds confuse version 1.7.0 Updates jellyfish to 0.9.0 Adds mediafile 0.10.1 Updates munkres to 1.1.4 Updates musicbrainzngs to 0.7.1 Updates mutagen to 1.46.0 Updates pyyaml to 6.0 Updates unidecode to 1.3.6
This commit is contained in:
parent
5073ec0c6f
commit
56c6773c6b
385 changed files with 25143 additions and 18080 deletions
|
@ -1,18 +1,16 @@
|
|||
import unicodedata
|
||||
from collections import defaultdict
|
||||
from .compat import _range, _zip_longest, IS_PY3
|
||||
from itertools import zip_longest
|
||||
from .porter import Stemmer
|
||||
|
||||
|
||||
def _normalize(s):
|
||||
return unicodedata.normalize('NFKD', s)
|
||||
return unicodedata.normalize("NFKD", s)
|
||||
|
||||
|
||||
def _check_type(s):
|
||||
if IS_PY3 and not isinstance(s, str):
|
||||
raise TypeError('expected str or unicode, got %s' % type(s).__name__)
|
||||
elif not IS_PY3 and not isinstance(s, unicode):
|
||||
raise TypeError('expected unicode, got %s' % type(s).__name__)
|
||||
if not isinstance(s, str):
|
||||
raise TypeError("expected str or unicode, got %s" % type(s).__name__)
|
||||
|
||||
|
||||
def levenshtein_distance(s1, s2):
|
||||
|
@ -21,53 +19,54 @@ def levenshtein_distance(s1, s2):
|
|||
|
||||
if s1 == s2:
|
||||
return 0
|
||||
rows = len(s1)+1
|
||||
cols = len(s2)+1
|
||||
rows = len(s1) + 1
|
||||
cols = len(s2) + 1
|
||||
|
||||
if not s1:
|
||||
return cols-1
|
||||
return cols - 1
|
||||
if not s2:
|
||||
return rows-1
|
||||
return rows - 1
|
||||
|
||||
prev = None
|
||||
cur = range(cols)
|
||||
for r in _range(1, rows):
|
||||
prev, cur = cur, [r] + [0]*(cols-1)
|
||||
for c in _range(1, cols):
|
||||
for r in range(1, rows):
|
||||
prev, cur = cur, [r] + [0] * (cols - 1)
|
||||
for c in range(1, cols):
|
||||
deletion = prev[c] + 1
|
||||
insertion = cur[c-1] + 1
|
||||
edit = prev[c-1] + (0 if s1[r-1] == s2[c-1] else 1)
|
||||
insertion = cur[c - 1] + 1
|
||||
edit = prev[c - 1] + (0 if s1[r - 1] == s2[c - 1] else 1)
|
||||
cur[c] = min(edit, deletion, insertion)
|
||||
|
||||
return cur[-1]
|
||||
|
||||
|
||||
def _jaro_winkler(ying, yang, long_tolerance, winklerize):
|
||||
_check_type(ying)
|
||||
_check_type(yang)
|
||||
def _jaro_winkler(s1, s2, long_tolerance, winklerize):
|
||||
_check_type(s1)
|
||||
_check_type(s2)
|
||||
|
||||
ying_len = len(ying)
|
||||
yang_len = len(yang)
|
||||
s1_len = len(s1)
|
||||
s2_len = len(s2)
|
||||
|
||||
if not ying_len or not yang_len:
|
||||
if not s1_len or not s2_len:
|
||||
return 0.0
|
||||
|
||||
min_len = max(ying_len, yang_len)
|
||||
search_range = (min_len // 2) - 1
|
||||
min_len = min(s1_len, s2_len)
|
||||
search_range = max(s1_len, s2_len)
|
||||
search_range = (search_range // 2) - 1
|
||||
if search_range < 0:
|
||||
search_range = 0
|
||||
|
||||
ying_flags = [False]*ying_len
|
||||
yang_flags = [False]*yang_len
|
||||
s1_flags = [False] * s1_len
|
||||
s2_flags = [False] * s2_len
|
||||
|
||||
# looking only within search range, count & flag matched pairs
|
||||
common_chars = 0
|
||||
for i, ying_ch in enumerate(ying):
|
||||
low = i - search_range if i > search_range else 0
|
||||
hi = i + search_range if i + search_range < yang_len else yang_len - 1
|
||||
for j in _range(low, hi+1):
|
||||
if not yang_flags[j] and yang[j] == ying_ch:
|
||||
ying_flags[i] = yang_flags[j] = True
|
||||
for i, s1_ch in enumerate(s1):
|
||||
low = max(0, i - search_range)
|
||||
hi = min(i + search_range, s2_len - 1)
|
||||
for j in range(low, hi + 1):
|
||||
if not s2_flags[j] and s2[j] == s1_ch:
|
||||
s1_flags[i] = s2_flags[j] = True
|
||||
common_chars += 1
|
||||
break
|
||||
|
||||
|
@ -77,27 +76,32 @@ def _jaro_winkler(ying, yang, long_tolerance, winklerize):
|
|||
|
||||
# count transpositions
|
||||
k = trans_count = 0
|
||||
for i, ying_f in enumerate(ying_flags):
|
||||
if ying_f:
|
||||
for j in _range(k, yang_len):
|
||||
if yang_flags[j]:
|
||||
for i, s1_f in enumerate(s1_flags):
|
||||
if s1_f:
|
||||
for j in range(k, s2_len):
|
||||
if s2_flags[j]:
|
||||
k = j + 1
|
||||
break
|
||||
if ying[i] != yang[j]:
|
||||
if s1[i] != s2[j]:
|
||||
trans_count += 1
|
||||
trans_count /= 2
|
||||
trans_count //= 2
|
||||
|
||||
# adjust for similarities in nonmatched characters
|
||||
common_chars = float(common_chars)
|
||||
weight = ((common_chars/ying_len + common_chars/yang_len +
|
||||
(common_chars-trans_count) / common_chars)) / 3
|
||||
weight = (
|
||||
(
|
||||
common_chars / s1_len
|
||||
+ common_chars / s2_len
|
||||
+ (common_chars - trans_count) / common_chars
|
||||
)
|
||||
) / 3
|
||||
|
||||
# winkler modification: continue to boost if strings are similar
|
||||
if winklerize and weight > 0.7 and ying_len > 3 and yang_len > 3:
|
||||
if winklerize and weight > 0.7:
|
||||
# adjust for up to first 4 chars in common
|
||||
j = min(min_len, 4)
|
||||
i = 0
|
||||
while i < j and ying[i] == yang[i] and ying[i]:
|
||||
while i < j and s1[i] == s2[i]:
|
||||
i += 1
|
||||
if i:
|
||||
weight += i * 0.1 * (1.0 - weight)
|
||||
|
@ -105,13 +109,27 @@ def _jaro_winkler(ying, yang, long_tolerance, winklerize):
|
|||
# optionally adjust for long strings
|
||||
# after agreeing beginning chars, at least two or more must agree and
|
||||
# agreed characters must be > half of remaining characters
|
||||
if (long_tolerance and min_len > 4 and common_chars > i+1 and
|
||||
2 * common_chars >= min_len + i):
|
||||
weight += ((1.0 - weight) * (float(common_chars-i-1) / float(ying_len+yang_len-i*2+2)))
|
||||
if (
|
||||
long_tolerance
|
||||
and min_len > 4
|
||||
and common_chars > i + 1
|
||||
and 2 * common_chars >= min_len + i
|
||||
):
|
||||
weight += (1.0 - weight) * (
|
||||
float(common_chars - i - 1) / float(s1_len + s2_len - i * 2 + 2)
|
||||
)
|
||||
|
||||
return weight
|
||||
|
||||
|
||||
def jaro_similarity(s1, s2):
|
||||
return _jaro_winkler(s1, s2, False, False) # noqa
|
||||
|
||||
|
||||
def jaro_winkler_similarity(s1, s2, long_tolerance=False):
|
||||
return _jaro_winkler(s1, s2, long_tolerance, True) # noqa
|
||||
|
||||
|
||||
def damerau_levenshtein_distance(s1, s2):
|
||||
_check_type(s1)
|
||||
_check_type(s2)
|
||||
|
@ -124,41 +142,35 @@ def damerau_levenshtein_distance(s1, s2):
|
|||
da = defaultdict(int)
|
||||
|
||||
# distance matrix
|
||||
score = [[0]*(len2+2) for x in _range(len1+2)]
|
||||
score = [[0] * (len2 + 2) for x in range(len1 + 2)]
|
||||
|
||||
score[0][0] = infinite
|
||||
for i in _range(0, len1+1):
|
||||
score[i+1][0] = infinite
|
||||
score[i+1][1] = i
|
||||
for i in _range(0, len2+1):
|
||||
score[0][i+1] = infinite
|
||||
score[1][i+1] = i
|
||||
for i in range(0, len1 + 1):
|
||||
score[i + 1][0] = infinite
|
||||
score[i + 1][1] = i
|
||||
for i in range(0, len2 + 1):
|
||||
score[0][i + 1] = infinite
|
||||
score[1][i + 1] = i
|
||||
|
||||
for i in _range(1, len1+1):
|
||||
for i in range(1, len1 + 1):
|
||||
db = 0
|
||||
for j in _range(1, len2+1):
|
||||
i1 = da[s2[j-1]]
|
||||
for j in range(1, len2 + 1):
|
||||
i1 = da[s2[j - 1]]
|
||||
j1 = db
|
||||
cost = 1
|
||||
if s1[i-1] == s2[j-1]:
|
||||
if s1[i - 1] == s2[j - 1]:
|
||||
cost = 0
|
||||
db = j
|
||||
|
||||
score[i+1][j+1] = min(score[i][j] + cost,
|
||||
score[i+1][j] + 1,
|
||||
score[i][j+1] + 1,
|
||||
score[i1][j1] + (i-i1-1) + 1 + (j-j1-1))
|
||||
da[s1[i-1]] = i
|
||||
score[i + 1][j + 1] = min(
|
||||
score[i][j] + cost,
|
||||
score[i + 1][j] + 1,
|
||||
score[i][j + 1] + 1,
|
||||
score[i1][j1] + (i - i1 - 1) + 1 + (j - j1 - 1),
|
||||
)
|
||||
da[s1[i - 1]] = i
|
||||
|
||||
return score[len1+1][len2+1]
|
||||
|
||||
|
||||
def jaro_distance(s1, s2):
|
||||
return _jaro_winkler(s1, s2, False, False)
|
||||
|
||||
|
||||
def jaro_winkler(s1, s2, long_tolerance=False):
|
||||
return _jaro_winkler(s1, s2, long_tolerance, True)
|
||||
return score[len1 + 1][len2 + 1]
|
||||
|
||||
|
||||
def soundex(s):
|
||||
|
@ -166,21 +178,23 @@ def soundex(s):
|
|||
_check_type(s)
|
||||
|
||||
if not s:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
s = _normalize(s)
|
||||
s = s.upper()
|
||||
|
||||
replacements = (('BFPV', '1'),
|
||||
('CGJKQSXZ', '2'),
|
||||
('DT', '3'),
|
||||
('L', '4'),
|
||||
('MN', '5'),
|
||||
('R', '6'))
|
||||
replacements = (
|
||||
("BFPV", "1"),
|
||||
("CGJKQSXZ", "2"),
|
||||
("DT", "3"),
|
||||
("L", "4"),
|
||||
("MN", "5"),
|
||||
("R", "6"),
|
||||
)
|
||||
result = [s[0]]
|
||||
count = 1
|
||||
|
||||
# find would-be replacment for first character
|
||||
# find would-be replacement for first character
|
||||
for lset, sub in replacements:
|
||||
if s[0] in lset:
|
||||
last = sub
|
||||
|
@ -197,12 +211,14 @@ def soundex(s):
|
|||
last = sub
|
||||
break
|
||||
else:
|
||||
last = None
|
||||
if letter != "H" and letter != "W":
|
||||
# leave last alone if middle letter is H or W
|
||||
last = None
|
||||
if count == 4:
|
||||
break
|
||||
|
||||
result += '0'*(4-count)
|
||||
return ''.join(result)
|
||||
result += "0" * (4 - count)
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def hamming_distance(s1, s2):
|
||||
|
@ -227,28 +243,28 @@ def nysiis(s):
|
|||
_check_type(s)
|
||||
|
||||
if not s:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
s = s.upper()
|
||||
key = []
|
||||
|
||||
# step 1 - prefixes
|
||||
if s.startswith('MAC'):
|
||||
s = 'MCC' + s[3:]
|
||||
elif s.startswith('KN'):
|
||||
if s.startswith("MAC"):
|
||||
s = "MCC" + s[3:]
|
||||
elif s.startswith("KN"):
|
||||
s = s[1:]
|
||||
elif s.startswith('K'):
|
||||
s = 'C' + s[1:]
|
||||
elif s.startswith(('PH', 'PF')):
|
||||
s = 'FF' + s[2:]
|
||||
elif s.startswith('SCH'):
|
||||
s = 'SSS' + s[3:]
|
||||
elif s.startswith("K"):
|
||||
s = "C" + s[1:]
|
||||
elif s.startswith(("PH", "PF")):
|
||||
s = "FF" + s[2:]
|
||||
elif s.startswith("SCH"):
|
||||
s = "SSS" + s[3:]
|
||||
|
||||
# step 2 - suffixes
|
||||
if s.endswith(('IE', 'EE')):
|
||||
s = s[:-2] + 'Y'
|
||||
elif s.endswith(('DT', 'RT', 'RD', 'NT', 'ND')):
|
||||
s = s[:-2] + 'D'
|
||||
if s.endswith(("IE", "EE")):
|
||||
s = s[:-2] + "Y"
|
||||
elif s.endswith(("DT", "RT", "RD", "NT", "ND")):
|
||||
s = s[:-2] + "D"
|
||||
|
||||
# step 3 - first character of key comes from name
|
||||
key.append(s[0])
|
||||
|
@ -258,53 +274,57 @@ def nysiis(s):
|
|||
len_s = len(s)
|
||||
while i < len_s:
|
||||
ch = s[i]
|
||||
if ch == 'E' and i+1 < len_s and s[i+1] == 'V':
|
||||
ch = 'AF'
|
||||
if ch == "E" and i + 1 < len_s and s[i + 1] == "V":
|
||||
ch = "AF"
|
||||
i += 1
|
||||
elif ch in 'AEIOU':
|
||||
ch = 'A'
|
||||
elif ch == 'Q':
|
||||
ch = 'G'
|
||||
elif ch == 'Z':
|
||||
ch = 'S'
|
||||
elif ch == 'M':
|
||||
ch = 'N'
|
||||
elif ch == 'K':
|
||||
if i+1 < len(s) and s[i+1] == 'N':
|
||||
ch = 'N'
|
||||
elif ch in "AEIOU":
|
||||
ch = "A"
|
||||
elif ch == "Q":
|
||||
ch = "G"
|
||||
elif ch == "Z":
|
||||
ch = "S"
|
||||
elif ch == "M":
|
||||
ch = "N"
|
||||
elif ch == "K":
|
||||
if i + 1 < len(s) and s[i + 1] == "N":
|
||||
ch = "N"
|
||||
else:
|
||||
ch = 'C'
|
||||
elif ch == 'S' and s[i+1:i+3] == 'CH':
|
||||
ch = 'SS'
|
||||
ch = "C"
|
||||
elif ch == "S" and s[i + 1 : i + 3] == "CH":
|
||||
ch = "SS"
|
||||
i += 2
|
||||
elif ch == 'P' and i+1 < len(s) and s[i+1] == 'H':
|
||||
ch = 'F'
|
||||
elif ch == "P" and i + 1 < len(s) and s[i + 1] == "H":
|
||||
ch = "F"
|
||||
i += 1
|
||||
elif ch == 'H' and (s[i-1] not in 'AEIOU' or (i+1 < len(s) and s[i+1] not in 'AEIOU')):
|
||||
if s[i-1] in 'AEIOU':
|
||||
ch = 'A'
|
||||
elif ch == "H" and (
|
||||
s[i - 1] not in "AEIOU"
|
||||
or (i + 1 < len(s) and s[i + 1] not in "AEIOU")
|
||||
or (i + 1 == len(s))
|
||||
):
|
||||
if s[i - 1] in "AEIOU":
|
||||
ch = "A"
|
||||
else:
|
||||
ch = s[i-1]
|
||||
elif ch == 'W' and s[i-1] in 'AEIOU':
|
||||
ch = s[i-1]
|
||||
ch = s[i - 1]
|
||||
elif ch == "W" and s[i - 1] in "AEIOU":
|
||||
ch = s[i - 1]
|
||||
|
||||
if ch[-1] != key[-1][-1]:
|
||||
key.append(ch)
|
||||
|
||||
i += 1
|
||||
|
||||
key = ''.join(key)
|
||||
key = "".join(key)
|
||||
|
||||
# step 5 - remove trailing S
|
||||
if key.endswith('S') and key != 'S':
|
||||
if key.endswith("S") and key != "S":
|
||||
key = key[:-1]
|
||||
|
||||
# step 6 - replace AY w/ Y
|
||||
if key.endswith('AY'):
|
||||
key = key[:-2] + 'Y'
|
||||
if key.endswith("AY"):
|
||||
key = key[:-2] + "Y"
|
||||
|
||||
# step 7 - remove trailing A
|
||||
if key.endswith('A') and key != 'A':
|
||||
if key.endswith("A") and key != "A":
|
||||
key = key[:-1]
|
||||
|
||||
# step 8 was already done
|
||||
|
@ -315,24 +335,26 @@ def nysiis(s):
|
|||
def match_rating_codex(s):
|
||||
_check_type(s)
|
||||
|
||||
s = s.upper()
|
||||
# we ignore spaces
|
||||
s = s.upper().replace(" ", "")
|
||||
codex = []
|
||||
|
||||
prev = None
|
||||
for i, c in enumerate(s):
|
||||
# not a space OR
|
||||
# starting character & vowel
|
||||
first = True
|
||||
for c in s:
|
||||
# starting character
|
||||
# or consonant not preceded by same consonant
|
||||
if (c != ' ' and (i == 0 and c in 'AEIOU') or (c not in 'AEIOU' and c != prev)):
|
||||
if first or (c not in "AEIOU" and c != prev):
|
||||
codex.append(c)
|
||||
|
||||
prev = c
|
||||
first = False
|
||||
|
||||
# just use first/last 3
|
||||
if len(codex) > 6:
|
||||
return ''.join(codex[:3]+codex[-3:])
|
||||
return "".join(codex[:3] + codex[-3:])
|
||||
else:
|
||||
return ''.join(codex)
|
||||
return "".join(codex)
|
||||
|
||||
|
||||
def match_rating_comparison(s1, s2):
|
||||
|
@ -344,7 +366,7 @@ def match_rating_comparison(s1, s2):
|
|||
res2 = []
|
||||
|
||||
# length differs by 3 or more, no result
|
||||
if abs(len1-len2) >= 3:
|
||||
if abs(len1 - len2) >= 3:
|
||||
return None
|
||||
|
||||
# get minimum rating based on sums of codexes
|
||||
|
@ -359,7 +381,7 @@ def match_rating_comparison(s1, s2):
|
|||
min_rating = 2
|
||||
|
||||
# strip off common prefixes
|
||||
for c1, c2 in _zip_longest(codex1, codex2):
|
||||
for c1, c2 in zip_longest(codex1, codex2):
|
||||
if c1 != c2:
|
||||
if c1:
|
||||
res1.append(c1)
|
||||
|
@ -367,7 +389,7 @@ def match_rating_comparison(s1, s2):
|
|||
res2.append(c2)
|
||||
|
||||
unmatched_count1 = unmatched_count2 = 0
|
||||
for c1, c2 in _zip_longest(reversed(res1), reversed(res2)):
|
||||
for c1, c2 in zip_longest(reversed(res1), reversed(res2)):
|
||||
if c1 != c2:
|
||||
if c1:
|
||||
unmatched_count1 += 1
|
||||
|
@ -385,112 +407,113 @@ def metaphone(s):
|
|||
s = _normalize(s.lower())
|
||||
|
||||
# skip first character if s starts with these
|
||||
if s.startswith(('kn', 'gn', 'pn', 'ac', 'wr', 'ae')):
|
||||
if s.startswith(("kn", "gn", "pn", "wr", "ae")):
|
||||
s = s[1:]
|
||||
|
||||
i = 0
|
||||
|
||||
while i < len(s):
|
||||
c = s[i]
|
||||
next = s[i+1] if i < len(s)-1 else '*****'
|
||||
nextnext = s[i+2] if i < len(s)-2 else '*****'
|
||||
next = s[i + 1] if i < len(s) - 1 else "*****"
|
||||
nextnext = s[i + 2] if i < len(s) - 2 else "*****"
|
||||
|
||||
# skip doubles except for cc
|
||||
if c == next and c != 'c':
|
||||
if c == next and c != "c":
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if c in 'aeiou':
|
||||
if i == 0 or s[i-1] == ' ':
|
||||
if c in "aeiou":
|
||||
if i == 0 or s[i - 1] == " ":
|
||||
result.append(c)
|
||||
elif c == 'b':
|
||||
if (not (i != 0 and s[i-1] == 'm')) or next:
|
||||
result.append('b')
|
||||
elif c == 'c':
|
||||
if next == 'i' and nextnext == 'a' or next == 'h':
|
||||
result.append('x')
|
||||
elif c == "b":
|
||||
if (not (i != 0 and s[i - 1] == "m")) or next:
|
||||
result.append("b")
|
||||
elif c == "c":
|
||||
if next == "i" and nextnext == "a" or next == "h":
|
||||
result.append("x")
|
||||
i += 1
|
||||
elif next in 'iey':
|
||||
result.append('s')
|
||||
elif next in "iey":
|
||||
result.append("s")
|
||||
i += 1
|
||||
else:
|
||||
result.append('k')
|
||||
elif c == 'd':
|
||||
if next == 'g' and nextnext in 'iey':
|
||||
result.append('j')
|
||||
result.append("k")
|
||||
elif c == "d":
|
||||
if next == "g" and nextnext in "iey":
|
||||
result.append("j")
|
||||
i += 2
|
||||
else:
|
||||
result.append('t')
|
||||
elif c in 'fjlmnr':
|
||||
result.append("t")
|
||||
elif c in "fjlmnr":
|
||||
result.append(c)
|
||||
elif c == 'g':
|
||||
if next in 'iey':
|
||||
result.append('j')
|
||||
elif next not in 'hn':
|
||||
result.append('k')
|
||||
elif next == 'h' and nextnext and nextnext not in 'aeiou':
|
||||
elif c == "g":
|
||||
if next in "iey":
|
||||
result.append("j")
|
||||
elif next == "h" and nextnext and nextnext not in "aeiou":
|
||||
i += 1
|
||||
elif c == 'h':
|
||||
if i == 0 or next in 'aeiou' or s[i-1] not in 'aeiou':
|
||||
result.append('h')
|
||||
elif c == 'k':
|
||||
if i == 0 or s[i-1] != 'c':
|
||||
result.append('k')
|
||||
elif c == 'p':
|
||||
if next == 'h':
|
||||
result.append('f')
|
||||
elif next == "n" and not nextnext:
|
||||
i += 1
|
||||
else:
|
||||
result.append('p')
|
||||
elif c == 'q':
|
||||
result.append('k')
|
||||
elif c == 's':
|
||||
if next == 'h':
|
||||
result.append('x')
|
||||
result.append("k")
|
||||
elif c == "h":
|
||||
if i == 0 or next in "aeiou" or s[i - 1] not in "aeiou":
|
||||
result.append("h")
|
||||
elif c == "k":
|
||||
if i == 0 or s[i - 1] != "c":
|
||||
result.append("k")
|
||||
elif c == "p":
|
||||
if next == "h":
|
||||
result.append("f")
|
||||
i += 1
|
||||
elif next == 'i' and nextnext in 'oa':
|
||||
result.append('x')
|
||||
else:
|
||||
result.append("p")
|
||||
elif c == "q":
|
||||
result.append("k")
|
||||
elif c == "s":
|
||||
if next == "h":
|
||||
result.append("x")
|
||||
i += 1
|
||||
elif next == "i" and nextnext in "oa":
|
||||
result.append("x")
|
||||
i += 2
|
||||
else:
|
||||
result.append('s')
|
||||
elif c == 't':
|
||||
if next == 'i' and nextnext in 'oa':
|
||||
result.append('x')
|
||||
elif next == 'h':
|
||||
result.append('0')
|
||||
result.append("s")
|
||||
elif c == "t":
|
||||
if next == "i" and nextnext in "oa":
|
||||
result.append("x")
|
||||
elif next == "h":
|
||||
result.append("0")
|
||||
i += 1
|
||||
elif next != 'c' or nextnext != 'h':
|
||||
result.append('t')
|
||||
elif c == 'v':
|
||||
result.append('f')
|
||||
elif c == 'w':
|
||||
if i == 0 and next == 'h':
|
||||
elif next != "c" or nextnext != "h":
|
||||
result.append("t")
|
||||
elif c == "v":
|
||||
result.append("f")
|
||||
elif c == "w":
|
||||
if i == 0 and next == "h":
|
||||
i += 1
|
||||
if nextnext in 'aeiou' or nextnext == '*****':
|
||||
result.append('w')
|
||||
elif next in 'aeiou' or next == '*****':
|
||||
result.append('w')
|
||||
elif c == 'x':
|
||||
result.append("w")
|
||||
elif next in "aeiou":
|
||||
result.append("w")
|
||||
elif c == "x":
|
||||
if i == 0:
|
||||
if next == 'h' or (next == 'i' and nextnext in 'oa'):
|
||||
result.append('x')
|
||||
if next == "h" or (next == "i" and nextnext in "oa"):
|
||||
result.append("x")
|
||||
else:
|
||||
result.append('s')
|
||||
result.append("s")
|
||||
else:
|
||||
result.append('k')
|
||||
result.append('s')
|
||||
elif c == 'y':
|
||||
if next in 'aeiou':
|
||||
result.append('y')
|
||||
elif c == 'z':
|
||||
result.append('s')
|
||||
elif c == ' ':
|
||||
if len(result) > 0 and result[-1] != ' ':
|
||||
result.append(' ')
|
||||
result.append("k")
|
||||
result.append("s")
|
||||
elif c == "y":
|
||||
if next in "aeiou":
|
||||
result.append("y")
|
||||
elif c == "z":
|
||||
result.append("s")
|
||||
elif c == " ":
|
||||
if len(result) > 0 and result[-1] != " ":
|
||||
result.append(" ")
|
||||
|
||||
i += 1
|
||||
|
||||
return ''.join(result).upper()
|
||||
return "".join(result).upper()
|
||||
|
||||
|
||||
def porter_stem(s):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue