diff --git a/core/utils/encoding.py b/core/utils/encoding.py index b97083d8..e4ad894a 100644 --- a/core/utils/encoding.py +++ b/core/utils/encoding.py @@ -27,41 +27,69 @@ def char_replace(name_in): encoded = False encoding = None if isinstance(name_in, text_type): - return encoded, str(name_in.encode(core.SYS_ENCODING)) + return encoded, name_in if PY2: name = name_in + for Idx in range(len(name)): + # print('Trying to intuit the encoding') + # /!\ detection is done 2char by 2char for UTF-8 special character + if (len(name) != 1) & (Idx < (len(name) - 1)): + # Detect UTF-8 + if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & ( + (name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')): + encoding = 'utf-8' + break + # Detect CP850 + elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'): + encoding = 'cp850' + break + # Detect ISO-8859-15 + elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'): + encoding = 'iso-8859-15' + break + else: + # Detect CP850 + if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'): + encoding = 'cp850' + break + # Detect ISO-8859-15 + elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'): + encoding = 'iso-8859-15' + break else: name = bytes(name_in) - for Idx in range(len(name)): - # print('Trying to intuit the encoding') - # /!\ detection is done 2char by 2char for UTF-8 special character - if (len(name) != 1) & (Idx < (len(name) - 1)): - # Detect UTF-8 - if ((name[Idx] == 0xC2) | (name[Idx] == 0xC3)) & ( - (name[Idx + 1] >= 0xA0) & (name[Idx + 1] <= 0xFF)): - encoding = 'utf-8' - break - # Detect CP850 - elif (name[Idx] >= 0x80) & (name[Idx] <= 0xA5): - encoding = 'cp850' - break - # Detect ISO-8859-15 - elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF): - encoding = 'iso-8859-15' - break - else: - # Detect CP850 - if (name[Idx] >= 0x80) & (name[Idx] <= 0xA5): - encoding = 'cp850' - break - # Detect ISO-8859-15 - elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF): - encoding = 'iso-8859-15' - break - if encoding and not encoding == core.SYS_ENCODING: + for Idx in range(len(name)): + # print('Trying to intuit the encoding') + # /!\ detection is done 2char by 2char for UTF-8 special character + if (len(name) != 1) & (Idx < (len(name) - 1)): + # Detect UTF-8 + if ((name[Idx] == 0xC2) | (name[Idx] == 0xC3)) & ( + (name[Idx + 1] >= 0xA0) & (name[Idx + 1] <= 0xFF)): + encoding = 'utf-8' + break + # Detect CP850 + elif (name[Idx] >= 0x80) & (name[Idx] <= 0xA5): + encoding = 'cp850' + break + # Detect ISO-8859-15 + elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF): + encoding = 'iso-8859-15' + break + else: + # Detect CP850 + if (name[Idx] >= 0x80) & (name[Idx] <= 0xA5): + encoding = 'cp850' + break + # Detect ISO-8859-15 + elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF): + encoding = 'iso-8859-15' + break + if encoding: encoded = True - name = name.decode(encoding).encode(core.SYS_ENCODING) - return encoded, str(name) + name = name.decode(encoding) + elif not PY2: + name = name.decode() + return encoded, name def convert_to_ascii(input_name, dir_name):