From 807518137c3325e8236bef5dedc45c9d480e14ec Mon Sep 17 00:00:00 2001 From: Jonathan Springer Date: Sat, 4 Jan 2020 03:15:11 -0500 Subject: [PATCH] Update encoding to use bytes for strings. (#1690) --- core/utils/encoding.py | 24 ++++++++++++++---------- nzbToMedia.py | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/core/utils/encoding.py b/core/utils/encoding.py index bcc4994f..e36ed094 100644 --- a/core/utils/encoding.py +++ b/core/utils/encoding.py @@ -5,6 +5,8 @@ from __future__ import ( unicode_literals, ) +from builtins import bytes + import os from six import text_type @@ -13,7 +15,7 @@ import core from core import logger -def char_replace(name): +def char_replace(name_in): # Special character hex range: # CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15) # UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF @@ -22,36 +24,38 @@ def char_replace(name): # If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding encoded = False encoding = None - if isinstance(name, text_type): - return encoded, name.encode(core.SYS_ENCODING) + if isinstance(name_in, text_type): + return encoded, name_in.encode(core.SYS_ENCODING) + name=bytes(name_in) for Idx in range(len(name)): + print('Trying to intuit the encoding') # /!\ detection is done 2char by 2char for UTF-8 special character if (len(name) != 1) & (Idx < (len(name) - 1)): # Detect UTF-8 - if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & ( - (name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')): + if ((name[Idx] == 0xC2) | (name[Idx] == 0xC3)) & ( + (name[Idx + 1] >= 0xA0) & (name[Idx + 1] <= 0xFF)): encoding = 'utf-8' break # Detect CP850 - elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'): + elif (name[Idx] >= 0x80) & (name[Idx] <= 0xA5): encoding = 'cp850' break # Detect ISO-8859-15 - elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'): + elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF): encoding = 'iso-8859-15' break else: # Detect CP850 - if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'): + if (name[Idx] >= 0x80) & (name[Idx] <= 0xA5): encoding = 'cp850' break # Detect ISO-8859-15 - elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'): + elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF): encoding = 'iso-8859-15' break if encoding and not encoding == core.SYS_ENCODING: encoded = True - name = name.decode(encoding).encode(core.SYS_ENCODING) + name_enc = name.decode(encoding).encode(core.SYS_ENCODING) return encoded, name diff --git a/nzbToMedia.py b/nzbToMedia.py index 073cb6f1..6ebfd116 100755 --- a/nzbToMedia.py +++ b/nzbToMedia.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # coding=utf-8 # ##############################################################################