mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-20 05:13:16 -07:00
fix encoding checks
This commit is contained in:
parent
71c435ba48
commit
f2c07f3c38
1 changed files with 58 additions and 30 deletions
|
@ -27,41 +27,69 @@ def char_replace(name_in):
|
||||||
encoded = False
|
encoded = False
|
||||||
encoding = None
|
encoding = None
|
||||||
if isinstance(name_in, text_type):
|
if isinstance(name_in, text_type):
|
||||||
return encoded, str(name_in.encode(core.SYS_ENCODING))
|
return encoded, name_in
|
||||||
if PY2:
|
if PY2:
|
||||||
name = name_in
|
name = name_in
|
||||||
|
for Idx in range(len(name)):
|
||||||
|
# print('Trying to intuit the encoding')
|
||||||
|
# /!\ detection is done 2char by 2char for UTF-8 special character
|
||||||
|
if (len(name) != 1) & (Idx < (len(name) - 1)):
|
||||||
|
# Detect UTF-8
|
||||||
|
if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & (
|
||||||
|
(name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')):
|
||||||
|
encoding = 'utf-8'
|
||||||
|
break
|
||||||
|
# Detect CP850
|
||||||
|
elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
|
||||||
|
encoding = 'cp850'
|
||||||
|
break
|
||||||
|
# Detect ISO-8859-15
|
||||||
|
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
|
||||||
|
encoding = 'iso-8859-15'
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Detect CP850
|
||||||
|
if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
|
||||||
|
encoding = 'cp850'
|
||||||
|
break
|
||||||
|
# Detect ISO-8859-15
|
||||||
|
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
|
||||||
|
encoding = 'iso-8859-15'
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
name = bytes(name_in)
|
name = bytes(name_in)
|
||||||
for Idx in range(len(name)):
|
for Idx in range(len(name)):
|
||||||
# print('Trying to intuit the encoding')
|
# print('Trying to intuit the encoding')
|
||||||
# /!\ detection is done 2char by 2char for UTF-8 special character
|
# /!\ detection is done 2char by 2char for UTF-8 special character
|
||||||
if (len(name) != 1) & (Idx < (len(name) - 1)):
|
if (len(name) != 1) & (Idx < (len(name) - 1)):
|
||||||
# Detect UTF-8
|
# Detect UTF-8
|
||||||
if ((name[Idx] == 0xC2) | (name[Idx] == 0xC3)) & (
|
if ((name[Idx] == 0xC2) | (name[Idx] == 0xC3)) & (
|
||||||
(name[Idx + 1] >= 0xA0) & (name[Idx + 1] <= 0xFF)):
|
(name[Idx + 1] >= 0xA0) & (name[Idx + 1] <= 0xFF)):
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
break
|
break
|
||||||
# Detect CP850
|
# Detect CP850
|
||||||
elif (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
|
elif (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
|
||||||
encoding = 'cp850'
|
encoding = 'cp850'
|
||||||
break
|
break
|
||||||
# Detect ISO-8859-15
|
# Detect ISO-8859-15
|
||||||
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
|
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
|
||||||
encoding = 'iso-8859-15'
|
encoding = 'iso-8859-15'
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# Detect CP850
|
# Detect CP850
|
||||||
if (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
|
if (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
|
||||||
encoding = 'cp850'
|
encoding = 'cp850'
|
||||||
break
|
break
|
||||||
# Detect ISO-8859-15
|
# Detect ISO-8859-15
|
||||||
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
|
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
|
||||||
encoding = 'iso-8859-15'
|
encoding = 'iso-8859-15'
|
||||||
break
|
break
|
||||||
if encoding and not encoding == core.SYS_ENCODING:
|
if encoding:
|
||||||
encoded = True
|
encoded = True
|
||||||
name = name.decode(encoding).encode(core.SYS_ENCODING)
|
name = name.decode(encoding)
|
||||||
return encoded, str(name)
|
elif not PY2:
|
||||||
|
name = name.decode()
|
||||||
|
return encoded, name
|
||||||
|
|
||||||
|
|
||||||
def convert_to_ascii(input_name, dir_name):
|
def convert_to_ascii(input_name, dir_name):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue