mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-07-12 16:22:53 -07:00
Refactor encoding utils to utils.encoding
This commit is contained in:
parent
9d43e0d60b
commit
6a9ff96e8c
2 changed files with 86 additions and 79 deletions
|
@ -19,6 +19,7 @@ import core
|
||||||
from core import extractor, logger
|
from core import extractor, logger
|
||||||
from core.utils import shutil_custom
|
from core.utils import shutil_custom
|
||||||
from core.utils.download_info import get_download_info, update_download_info_status
|
from core.utils.download_info import get_download_info, update_download_info_status
|
||||||
|
from core.utils.encoding import char_replace, convert_to_ascii
|
||||||
from core.utils.links import copy_link, replace_links
|
from core.utils.links import copy_link, replace_links
|
||||||
from core.utils.naming import clean_file_name, is_sample, sanitize_name
|
from core.utils.naming import clean_file_name, is_sample, sanitize_name
|
||||||
from core.utils.network import find_download, test_connection, wake_on_lan, wake_up
|
from core.utils.network import find_download, test_connection, wake_on_lan, wake_up
|
||||||
|
@ -183,85 +184,6 @@ def flatten(output_destination):
|
||||||
remove_empty_folders(output_destination) # Cleanup empty directories
|
remove_empty_folders(output_destination) # Cleanup empty directories
|
||||||
|
|
||||||
|
|
||||||
def char_replace(name):
|
|
||||||
# Special character hex range:
|
|
||||||
# CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15)
|
|
||||||
# UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF
|
|
||||||
# ISO-8859-15: 0xA6-0xFF
|
|
||||||
# The function will detect if Name contains a special character
|
|
||||||
# If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding
|
|
||||||
encoded = False
|
|
||||||
encoding = None
|
|
||||||
if isinstance(name, text_type):
|
|
||||||
return encoded, name.encode(core.SYS_ENCODING)
|
|
||||||
for Idx in range(len(name)):
|
|
||||||
# /!\ detection is done 2char by 2char for UTF-8 special character
|
|
||||||
if (len(name) != 1) & (Idx < (len(name) - 1)):
|
|
||||||
# Detect UTF-8
|
|
||||||
if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & (
|
|
||||||
(name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')):
|
|
||||||
encoding = 'utf-8'
|
|
||||||
break
|
|
||||||
# Detect CP850
|
|
||||||
elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
|
|
||||||
encoding = 'cp850'
|
|
||||||
break
|
|
||||||
# Detect ISO-8859-15
|
|
||||||
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
|
|
||||||
encoding = 'iso-8859-15'
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# Detect CP850
|
|
||||||
if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
|
|
||||||
encoding = 'cp850'
|
|
||||||
break
|
|
||||||
# Detect ISO-8859-15
|
|
||||||
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
|
|
||||||
encoding = 'iso-8859-15'
|
|
||||||
break
|
|
||||||
if encoding and not encoding == core.SYS_ENCODING:
|
|
||||||
encoded = True
|
|
||||||
name = name.decode(encoding).encode(core.SYS_ENCODING)
|
|
||||||
return encoded, name
|
|
||||||
|
|
||||||
|
|
||||||
def convert_to_ascii(input_name, dir_name):
|
|
||||||
|
|
||||||
ascii_convert = int(core.CFG['ASCII']['convert'])
|
|
||||||
if ascii_convert == 0 or os.name == 'nt': # just return if we don't want to convert or on windows os and '\' is replaced!.
|
|
||||||
return input_name, dir_name
|
|
||||||
|
|
||||||
encoded, input_name = char_replace(input_name)
|
|
||||||
|
|
||||||
directory, base = os.path.split(dir_name)
|
|
||||||
if not base: # ended with '/'
|
|
||||||
directory, base = os.path.split(directory)
|
|
||||||
|
|
||||||
encoded, base2 = char_replace(base)
|
|
||||||
if encoded:
|
|
||||||
dir_name = os.path.join(directory, base2)
|
|
||||||
logger.info('Renaming directory to: {0}.'.format(base2), 'ENCODER')
|
|
||||||
os.rename(os.path.join(directory, base), dir_name)
|
|
||||||
if 'NZBOP_SCRIPTDIR' in os.environ:
|
|
||||||
print('[NZB] DIRECTORY={0}'.format(dir_name))
|
|
||||||
|
|
||||||
for dirname, dirnames, filenames in os.walk(dir_name, topdown=False):
|
|
||||||
for subdirname in dirnames:
|
|
||||||
encoded, subdirname2 = char_replace(subdirname)
|
|
||||||
if encoded:
|
|
||||||
logger.info('Renaming directory to: {0}.'.format(subdirname2), 'ENCODER')
|
|
||||||
os.rename(os.path.join(dirname, subdirname), os.path.join(dirname, subdirname2))
|
|
||||||
|
|
||||||
for dirname, dirnames, filenames in os.walk(dir_name):
|
|
||||||
for filename in filenames:
|
|
||||||
encoded, filename2 = char_replace(filename)
|
|
||||||
if encoded:
|
|
||||||
logger.info('Renaming file to: {0}.'.format(filename2), 'ENCODER')
|
|
||||||
os.rename(os.path.join(dirname, filename), os.path.join(dirname, filename2))
|
|
||||||
|
|
||||||
return input_name, dir_name
|
|
||||||
|
|
||||||
|
|
||||||
def get_dirs(section, subsection, link='hard'):
|
def get_dirs(section, subsection, link='hard'):
|
||||||
to_return = []
|
to_return = []
|
||||||
|
|
||||||
|
|
85
core/utils/encoding.py
Normal file
85
core/utils/encoding.py
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
from six import text_type
|
||||||
|
|
||||||
|
import core
|
||||||
|
from core import logger
|
||||||
|
|
||||||
|
|
||||||
|
def char_replace(name):
|
||||||
|
# Special character hex range:
|
||||||
|
# CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15)
|
||||||
|
# UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF
|
||||||
|
# ISO-8859-15: 0xA6-0xFF
|
||||||
|
# The function will detect if Name contains a special character
|
||||||
|
# If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding
|
||||||
|
encoded = False
|
||||||
|
encoding = None
|
||||||
|
if isinstance(name, text_type):
|
||||||
|
return encoded, name.encode(core.SYS_ENCODING)
|
||||||
|
for Idx in range(len(name)):
|
||||||
|
# /!\ detection is done 2char by 2char for UTF-8 special character
|
||||||
|
if (len(name) != 1) & (Idx < (len(name) - 1)):
|
||||||
|
# Detect UTF-8
|
||||||
|
if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & (
|
||||||
|
(name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')):
|
||||||
|
encoding = 'utf-8'
|
||||||
|
break
|
||||||
|
# Detect CP850
|
||||||
|
elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
|
||||||
|
encoding = 'cp850'
|
||||||
|
break
|
||||||
|
# Detect ISO-8859-15
|
||||||
|
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
|
||||||
|
encoding = 'iso-8859-15'
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Detect CP850
|
||||||
|
if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
|
||||||
|
encoding = 'cp850'
|
||||||
|
break
|
||||||
|
# Detect ISO-8859-15
|
||||||
|
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
|
||||||
|
encoding = 'iso-8859-15'
|
||||||
|
break
|
||||||
|
if encoding and not encoding == core.SYS_ENCODING:
|
||||||
|
encoded = True
|
||||||
|
name = name.decode(encoding).encode(core.SYS_ENCODING)
|
||||||
|
return encoded, name
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_ascii(input_name, dir_name):
|
||||||
|
|
||||||
|
ascii_convert = int(core.CFG['ASCII']['convert'])
|
||||||
|
if ascii_convert == 0 or os.name == 'nt': # just return if we don't want to convert or on windows os and '\' is replaced!.
|
||||||
|
return input_name, dir_name
|
||||||
|
|
||||||
|
encoded, input_name = char_replace(input_name)
|
||||||
|
|
||||||
|
directory, base = os.path.split(dir_name)
|
||||||
|
if not base: # ended with '/'
|
||||||
|
directory, base = os.path.split(directory)
|
||||||
|
|
||||||
|
encoded, base2 = char_replace(base)
|
||||||
|
if encoded:
|
||||||
|
dir_name = os.path.join(directory, base2)
|
||||||
|
logger.info('Renaming directory to: {0}.'.format(base2), 'ENCODER')
|
||||||
|
os.rename(os.path.join(directory, base), dir_name)
|
||||||
|
if 'NZBOP_SCRIPTDIR' in os.environ:
|
||||||
|
print('[NZB] DIRECTORY={0}'.format(dir_name))
|
||||||
|
|
||||||
|
for dirname, dirnames, filenames in os.walk(dir_name, topdown=False):
|
||||||
|
for subdirname in dirnames:
|
||||||
|
encoded, subdirname2 = char_replace(subdirname)
|
||||||
|
if encoded:
|
||||||
|
logger.info('Renaming directory to: {0}.'.format(subdirname2), 'ENCODER')
|
||||||
|
os.rename(os.path.join(dirname, subdirname), os.path.join(dirname, subdirname2))
|
||||||
|
|
||||||
|
for dirname, dirnames, filenames in os.walk(dir_name):
|
||||||
|
for filename in filenames:
|
||||||
|
encoded, filename2 = char_replace(filename)
|
||||||
|
if encoded:
|
||||||
|
logger.info('Renaming file to: {0}.'.format(filename2), 'ENCODER')
|
||||||
|
os.rename(os.path.join(dirname, filename), os.path.join(dirname, filename2))
|
||||||
|
|
||||||
|
return input_name, dir_name
|
Loading…
Add table
Add a link
Reference in a new issue