Refactor encoding utils to utils.encoding

2025-07-12 16:22:53 -07:00 · 2019-01-05 17:25:36 -05:00 · 2019-01-05 17:25:36 -05:00 · 6a9ff96e8c
commit 6a9ff96e8c
parent 9d43e0d60b
2 changed files with 86 additions and 79 deletions
--- a/core/utils/init.py
+++ b/core/utils/init.py
@ -19,6 +19,7 @@ import core
 from core import extractor, logger
 from core.utils import shutil_custom
 from core.utils.download_info import get_download_info, update_download_info_status
 from core.utils.encoding import char_replace, convert_to_ascii
 from core.utils.links import copy_link, replace_links
 from core.utils.naming import clean_file_name, is_sample, sanitize_name
 from core.utils.network import find_download, test_connection, wake_on_lan, wake_up
@ -183,85 +184,6 @@ def flatten(output_destination):
    remove_empty_folders(output_destination)  # Cleanup empty directories
 def char_replace(name):
    # Special character hex range:
    # CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15)
    # UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF
    # ISO-8859-15: 0xA6-0xFF
    # The function will detect if Name contains a special character
    # If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding
    encoded = False
    encoding = None
    if isinstance(name, text_type):
        return encoded, name.encode(core.SYS_ENCODING)
    for Idx in range(len(name)):
        # /!\ detection is done 2char by 2char for UTF-8 special character
        if (len(name) != 1) & (Idx < (len(name) - 1)):
            # Detect UTF-8
            if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & (
                    (name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')):
                encoding = 'utf-8'
                break
            # Detect CP850
            elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
                encoding = 'cp850'
                break
            # Detect ISO-8859-15
            elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
                encoding = 'iso-8859-15'
                break
        else:
            # Detect CP850
            if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
                encoding = 'cp850'
                break
            # Detect ISO-8859-15
            elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
                encoding = 'iso-8859-15'
                break
    if encoding and not encoding == core.SYS_ENCODING:
        encoded = True
        name = name.decode(encoding).encode(core.SYS_ENCODING)
    return encoded, name
 def convert_to_ascii(input_name, dir_name):
    ascii_convert = int(core.CFG['ASCII']['convert'])
    if ascii_convert == 0 or os.name == 'nt':  # just return if we don't want to convert or on windows os and '\' is replaced!.
        return input_name, dir_name
    encoded, input_name = char_replace(input_name)
    directory, base = os.path.split(dir_name)
    if not base:  # ended with '/'
        directory, base = os.path.split(directory)
    encoded, base2 = char_replace(base)
    if encoded:
        dir_name = os.path.join(directory, base2)
        logger.info('Renaming directory to: {0}.'.format(base2), 'ENCODER')
        os.rename(os.path.join(directory, base), dir_name)
        if 'NZBOP_SCRIPTDIR' in os.environ:
            print('[NZB] DIRECTORY={0}'.format(dir_name))
    for dirname, dirnames, filenames in os.walk(dir_name, topdown=False):
        for subdirname in dirnames:
            encoded, subdirname2 = char_replace(subdirname)
            if encoded:
                logger.info('Renaming directory to: {0}.'.format(subdirname2), 'ENCODER')
                os.rename(os.path.join(dirname, subdirname), os.path.join(dirname, subdirname2))
    for dirname, dirnames, filenames in os.walk(dir_name):
        for filename in filenames:
            encoded, filename2 = char_replace(filename)
            if encoded:
                logger.info('Renaming file to: {0}.'.format(filename2), 'ENCODER')
                os.rename(os.path.join(dirname, filename), os.path.join(dirname, filename2))
    return input_name, dir_name
 def get_dirs(section, subsection, link='hard'):
    to_return = []
--- a/core/utils/encoding.py
+++ b/core/utils/encoding.py
@ -0,0 +1,85 @@
 import os
 from six import text_type
 import core
 from core import logger
 def char_replace(name):
    # Special character hex range:
    # CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15)
    # UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF
    # ISO-8859-15: 0xA6-0xFF
    # The function will detect if Name contains a special character
    # If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding
    encoded = False
    encoding = None
    if isinstance(name, text_type):
        return encoded, name.encode(core.SYS_ENCODING)
    for Idx in range(len(name)):
        # /!\ detection is done 2char by 2char for UTF-8 special character
        if (len(name) != 1) & (Idx < (len(name) - 1)):
            # Detect UTF-8
            if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & (
                    (name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')):
                encoding = 'utf-8'
                break
            # Detect CP850
            elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
                encoding = 'cp850'
                break
            # Detect ISO-8859-15
            elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
                encoding = 'iso-8859-15'
                break
        else:
            # Detect CP850
            if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
                encoding = 'cp850'
                break
            # Detect ISO-8859-15
            elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
                encoding = 'iso-8859-15'
                break
    if encoding and not encoding == core.SYS_ENCODING:
        encoded = True
        name = name.decode(encoding).encode(core.SYS_ENCODING)
    return encoded, name
 def convert_to_ascii(input_name, dir_name):
    ascii_convert = int(core.CFG['ASCII']['convert'])
    if ascii_convert == 0 or os.name == 'nt':  # just return if we don't want to convert or on windows os and '\' is replaced!.
        return input_name, dir_name
    encoded, input_name = char_replace(input_name)
    directory, base = os.path.split(dir_name)
    if not base:  # ended with '/'
        directory, base = os.path.split(directory)
    encoded, base2 = char_replace(base)
    if encoded:
        dir_name = os.path.join(directory, base2)
        logger.info('Renaming directory to: {0}.'.format(base2), 'ENCODER')
        os.rename(os.path.join(directory, base), dir_name)
        if 'NZBOP_SCRIPTDIR' in os.environ:
            print('[NZB] DIRECTORY={0}'.format(dir_name))
    for dirname, dirnames, filenames in os.walk(dir_name, topdown=False):
        for subdirname in dirnames:
            encoded, subdirname2 = char_replace(subdirname)
            if encoded:
                logger.info('Renaming directory to: {0}.'.format(subdirname2), 'ENCODER')
                os.rename(os.path.join(dirname, subdirname), os.path.join(dirname, subdirname2))
    for dirname, dirnames, filenames in os.walk(dir_name):
        for filename in filenames:
            encoded, filename2 = char_replace(filename)
            if encoded:
                logger.info('Renaming file to: {0}.'.format(filename2), 'ENCODER')
                os.rename(os.path.join(dirname, filename), os.path.join(dirname, filename2))
    return input_name, dir_name