fix encoding checks

This commit is contained in:
clinton-hall 2020-01-05 13:39:23 +13:00
commit f2c07f3c38

View file

@ -27,41 +27,69 @@ def char_replace(name_in):
encoded = False
encoding = None
if isinstance(name_in, text_type):
return encoded, str(name_in.encode(core.SYS_ENCODING))
return encoded, name_in
if PY2:
name = name_in
for Idx in range(len(name)):
# print('Trying to intuit the encoding')
# /!\ detection is done 2char by 2char for UTF-8 special character
if (len(name) != 1) & (Idx < (len(name) - 1)):
# Detect UTF-8
if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & (
(name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')):
encoding = 'utf-8'
break
# Detect CP850
elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
encoding = 'iso-8859-15'
break
else:
# Detect CP850
if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
encoding = 'iso-8859-15'
break
else:
name = bytes(name_in)
for Idx in range(len(name)):
# print('Trying to intuit the encoding')
# /!\ detection is done 2char by 2char for UTF-8 special character
if (len(name) != 1) & (Idx < (len(name) - 1)):
# Detect UTF-8
if ((name[Idx] == 0xC2) | (name[Idx] == 0xC3)) & (
(name[Idx + 1] >= 0xA0) & (name[Idx + 1] <= 0xFF)):
encoding = 'utf-8'
break
# Detect CP850
elif (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
encoding = 'iso-8859-15'
break
else:
# Detect CP850
if (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
encoding = 'iso-8859-15'
break
if encoding and not encoding == core.SYS_ENCODING:
for Idx in range(len(name)):
# print('Trying to intuit the encoding')
# /!\ detection is done 2char by 2char for UTF-8 special character
if (len(name) != 1) & (Idx < (len(name) - 1)):
# Detect UTF-8
if ((name[Idx] == 0xC2) | (name[Idx] == 0xC3)) & (
(name[Idx + 1] >= 0xA0) & (name[Idx + 1] <= 0xFF)):
encoding = 'utf-8'
break
# Detect CP850
elif (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
encoding = 'iso-8859-15'
break
else:
# Detect CP850
if (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
encoding = 'iso-8859-15'
break
if encoding:
encoded = True
name = name.decode(encoding).encode(core.SYS_ENCODING)
return encoded, str(name)
name = name.decode(encoding)
elif not PY2:
name = name.decode()
return encoded, name
def convert_to_ascii(input_name, dir_name):