diff --git a/core/utils/__init__.py b/core/utils/__init__.py index 46a30431..d0bc778a 100644 --- a/core/utils/__init__.py +++ b/core/utils/__init__.py @@ -19,6 +19,7 @@ import core from core import extractor, logger from core.utils import shutil_custom from core.utils.download_info import get_download_info, update_download_info_status +from core.utils.encoding import char_replace, convert_to_ascii from core.utils.links import copy_link, replace_links from core.utils.naming import clean_file_name, is_sample, sanitize_name from core.utils.network import find_download, test_connection, wake_on_lan, wake_up @@ -183,85 +184,6 @@ def flatten(output_destination): remove_empty_folders(output_destination) # Cleanup empty directories -def char_replace(name): - # Special character hex range: - # CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15) - # UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF - # ISO-8859-15: 0xA6-0xFF - # The function will detect if Name contains a special character - # If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding - encoded = False - encoding = None - if isinstance(name, text_type): - return encoded, name.encode(core.SYS_ENCODING) - for Idx in range(len(name)): - # /!\ detection is done 2char by 2char for UTF-8 special character - if (len(name) != 1) & (Idx < (len(name) - 1)): - # Detect UTF-8 - if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & ( - (name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')): - encoding = 'utf-8' - break - # Detect CP850 - elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'): - encoding = 'cp850' - break - # Detect ISO-8859-15 - elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'): - encoding = 'iso-8859-15' - break - else: - # Detect CP850 - if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'): - encoding = 'cp850' - break - # Detect ISO-8859-15 - elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'): - encoding = 'iso-8859-15' - break - if encoding and not encoding == core.SYS_ENCODING: - encoded = True - name = name.decode(encoding).encode(core.SYS_ENCODING) - return encoded, name - - -def convert_to_ascii(input_name, dir_name): - - ascii_convert = int(core.CFG['ASCII']['convert']) - if ascii_convert == 0 or os.name == 'nt': # just return if we don't want to convert or on windows os and '\' is replaced!. - return input_name, dir_name - - encoded, input_name = char_replace(input_name) - - directory, base = os.path.split(dir_name) - if not base: # ended with '/' - directory, base = os.path.split(directory) - - encoded, base2 = char_replace(base) - if encoded: - dir_name = os.path.join(directory, base2) - logger.info('Renaming directory to: {0}.'.format(base2), 'ENCODER') - os.rename(os.path.join(directory, base), dir_name) - if 'NZBOP_SCRIPTDIR' in os.environ: - print('[NZB] DIRECTORY={0}'.format(dir_name)) - - for dirname, dirnames, filenames in os.walk(dir_name, topdown=False): - for subdirname in dirnames: - encoded, subdirname2 = char_replace(subdirname) - if encoded: - logger.info('Renaming directory to: {0}.'.format(subdirname2), 'ENCODER') - os.rename(os.path.join(dirname, subdirname), os.path.join(dirname, subdirname2)) - - for dirname, dirnames, filenames in os.walk(dir_name): - for filename in filenames: - encoded, filename2 = char_replace(filename) - if encoded: - logger.info('Renaming file to: {0}.'.format(filename2), 'ENCODER') - os.rename(os.path.join(dirname, filename), os.path.join(dirname, filename2)) - - return input_name, dir_name - - def get_dirs(section, subsection, link='hard'): to_return = [] diff --git a/core/utils/encoding.py b/core/utils/encoding.py new file mode 100644 index 00000000..ca19e054 --- /dev/null +++ b/core/utils/encoding.py @@ -0,0 +1,85 @@ +import os + +from six import text_type + +import core +from core import logger + + +def char_replace(name): + # Special character hex range: + # CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15) + # UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF + # ISO-8859-15: 0xA6-0xFF + # The function will detect if Name contains a special character + # If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding + encoded = False + encoding = None + if isinstance(name, text_type): + return encoded, name.encode(core.SYS_ENCODING) + for Idx in range(len(name)): + # /!\ detection is done 2char by 2char for UTF-8 special character + if (len(name) != 1) & (Idx < (len(name) - 1)): + # Detect UTF-8 + if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & ( + (name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')): + encoding = 'utf-8' + break + # Detect CP850 + elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'): + encoding = 'cp850' + break + # Detect ISO-8859-15 + elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'): + encoding = 'iso-8859-15' + break + else: + # Detect CP850 + if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'): + encoding = 'cp850' + break + # Detect ISO-8859-15 + elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'): + encoding = 'iso-8859-15' + break + if encoding and not encoding == core.SYS_ENCODING: + encoded = True + name = name.decode(encoding).encode(core.SYS_ENCODING) + return encoded, name + + +def convert_to_ascii(input_name, dir_name): + + ascii_convert = int(core.CFG['ASCII']['convert']) + if ascii_convert == 0 or os.name == 'nt': # just return if we don't want to convert or on windows os and '\' is replaced!. + return input_name, dir_name + + encoded, input_name = char_replace(input_name) + + directory, base = os.path.split(dir_name) + if not base: # ended with '/' + directory, base = os.path.split(directory) + + encoded, base2 = char_replace(base) + if encoded: + dir_name = os.path.join(directory, base2) + logger.info('Renaming directory to: {0}.'.format(base2), 'ENCODER') + os.rename(os.path.join(directory, base), dir_name) + if 'NZBOP_SCRIPTDIR' in os.environ: + print('[NZB] DIRECTORY={0}'.format(dir_name)) + + for dirname, dirnames, filenames in os.walk(dir_name, topdown=False): + for subdirname in dirnames: + encoded, subdirname2 = char_replace(subdirname) + if encoded: + logger.info('Renaming directory to: {0}.'.format(subdirname2), 'ENCODER') + os.rename(os.path.join(dirname, subdirname), os.path.join(dirname, subdirname2)) + + for dirname, dirnames, filenames in os.walk(dir_name): + for filename in filenames: + encoded, filename2 = char_replace(filename) + if encoded: + logger.info('Renaming file to: {0}.'.format(filename2), 'ENCODER') + os.rename(os.path.join(dirname, filename), os.path.join(dirname, filename2)) + + return input_name, dir_name