Update mediafile.py for Py3.13

This commit is contained in:
Clinton Hall 2024-10-19 21:30:49 +13:00 committed by GitHub
commit 9ecd91a6dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -33,8 +33,6 @@ Internally ``MediaFile`` uses ``MediaField`` descriptors to access the
data from the tags. In turn ``MediaField`` uses a number of data from the tags. In turn ``MediaField`` uses a number of
``StorageStyle`` strategies to handle format specific logic. ``StorageStyle`` strategies to handle format specific logic.
""" """
from __future__ import division, absolute_import, print_function
import mutagen import mutagen
import mutagen.id3 import mutagen.id3
import mutagen.mp3 import mutagen.mp3
@ -48,18 +46,17 @@ import binascii
import codecs import codecs
import datetime import datetime
import enum import enum
import filetype
import functools import functools
import imghdr
import logging import logging
import math import math
import os import os
import re import re
import six
import struct import struct
import traceback import traceback
__version__ = '0.10.1' __version__ = '0.13.0'
__all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile'] __all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile']
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -81,8 +78,6 @@ TYPES = {
'wav': 'WAVE', 'wav': 'WAVE',
} }
PREFERRED_IMAGE_EXTENSIONS = {'jpeg': 'jpg'}
# Exceptions. # Exceptions.
@ -136,8 +131,8 @@ def mutagen_call(action, filename, func, *args, **kwargs):
try: try:
return func(*args, **kwargs) return func(*args, **kwargs)
except mutagen.MutagenError as exc: except mutagen.MutagenError as exc:
log.debug(u'%s failed: %s', action, six.text_type(exc)) log.debug(u'%s failed: %s', action, str(exc))
raise UnreadableFileError(filename, six.text_type(exc)) raise UnreadableFileError(filename, str(exc))
except UnreadableFileError: except UnreadableFileError:
# Reraise our errors without changes. # Reraise our errors without changes.
# Used in case of decorating functions (e.g. by `loadfile`). # Used in case of decorating functions (e.g. by `loadfile`).
@ -202,8 +197,8 @@ def _safe_cast(out_type, val):
# Process any other type as a string. # Process any other type as a string.
if isinstance(val, bytes): if isinstance(val, bytes):
val = val.decode('utf-8', 'ignore') val = val.decode('utf-8', 'ignore')
elif not isinstance(val, six.string_types): elif not isinstance(val, str):
val = six.text_type(val) val = str(val)
# Get a number from the front of the string. # Get a number from the front of the string.
match = re.match(r'[\+-]?[0-9]+', val.strip()) match = re.match(r'[\+-]?[0-9]+', val.strip())
return int(match.group(0)) if match else 0 return int(match.group(0)) if match else 0
@ -215,13 +210,13 @@ def _safe_cast(out_type, val):
except ValueError: except ValueError:
return False return False
elif out_type == six.text_type: elif out_type == str:
if isinstance(val, bytes): if isinstance(val, bytes):
return val.decode('utf-8', 'ignore') return val.decode('utf-8', 'ignore')
elif isinstance(val, six.text_type): elif isinstance(val, str):
return val return val
else: else:
return six.text_type(val) return str(val)
elif out_type == float: elif out_type == float:
if isinstance(val, int) or isinstance(val, float): if isinstance(val, int) or isinstance(val, float):
@ -230,7 +225,7 @@ def _safe_cast(out_type, val):
if isinstance(val, bytes): if isinstance(val, bytes):
val = val.decode('utf-8', 'ignore') val = val.decode('utf-8', 'ignore')
else: else:
val = six.text_type(val) val = str(val)
match = re.match(r'[\+-]?([0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)', match = re.match(r'[\+-]?([0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)',
val.strip()) val.strip())
if match: if match:
@ -289,7 +284,7 @@ def _sc_decode(soundcheck):
""" """
# We decode binary data. If one of the formats gives us a text # We decode binary data. If one of the formats gives us a text
# string, interpret it as UTF-8. # string, interpret it as UTF-8.
if isinstance(soundcheck, six.text_type): if isinstance(soundcheck, str):
soundcheck = soundcheck.encode('utf-8') soundcheck = soundcheck.encode('utf-8')
# SoundCheck tags consist of 10 numbers, each represented by 8 # SoundCheck tags consist of 10 numbers, each represented by 8
@ -349,52 +344,15 @@ def _sc_encode(gain, peak):
# Cover art and other images. # Cover art and other images.
def _imghdr_what_wrapper(data):
"""A wrapper around imghdr.what to account for jpeg files that can only be
identified as such using their magic bytes
See #1545
See https://github.com/file/file/blob/master/magic/Magdir/jpeg#L12
"""
# imghdr.what returns none for jpegs with only the magic bytes, so
# _wider_test_jpeg is run in that case. It still returns None if it didn't
# match such a jpeg file.
return imghdr.what(None, h=data) or _wider_test_jpeg(data)
def _wider_test_jpeg(data):
"""Test for a jpeg file following the UNIX file implementation which
uses the magic bytes rather than just looking for the bytes that
represent 'JFIF' or 'EXIF' at a fixed position.
"""
if data[:2] == b'\xff\xd8':
return 'jpeg'
def image_mime_type(data): def image_mime_type(data):
"""Return the MIME type of the image data (a bytestring). """Return the MIME type of the image data (a bytestring).
""" """
# This checks for a jpeg file with only the magic bytes (unrecognized by return filetype.guess_mime(data)
# imghdr.what). imghdr.what returns none for that type of file, so
# _wider_test_jpeg is run in that case. It still returns None if it didn't
# match such a jpeg file.
kind = _imghdr_what_wrapper(data)
if kind in ['gif', 'jpeg', 'png', 'tiff', 'bmp']:
return 'image/{0}'.format(kind)
elif kind == 'pgm':
return 'image/x-portable-graymap'
elif kind == 'pbm':
return 'image/x-portable-bitmap'
elif kind == 'ppm':
return 'image/x-portable-pixmap'
elif kind == 'xbm':
return 'image/x-xbitmap'
else:
return 'image/x-{0}'.format(kind)
def image_extension(data): def image_extension(data):
ext = _imghdr_what_wrapper(data) return filetype.guess_extension(data)
return PREFERRED_IMAGE_EXTENSIONS.get(ext, ext)
class ImageType(enum.Enum): class ImageType(enum.Enum):
@ -437,7 +395,7 @@ class Image(object):
def __init__(self, data, desc=None, type=None): def __init__(self, data, desc=None, type=None):
assert isinstance(data, bytes) assert isinstance(data, bytes)
if desc is not None: if desc is not None:
assert isinstance(desc, six.text_type) assert isinstance(desc, str)
self.data = data self.data = data
self.desc = desc self.desc = desc
if isinstance(type, int): if isinstance(type, int):
@ -495,7 +453,7 @@ class StorageStyle(object):
"""List of mutagen classes the StorageStyle can handle. """List of mutagen classes the StorageStyle can handle.
""" """
def __init__(self, key, as_type=six.text_type, suffix=None, def __init__(self, key, as_type=str, suffix=None,
float_places=2, read_only=False): float_places=2, read_only=False):
"""Create a basic storage strategy. Parameters: """Create a basic storage strategy. Parameters:
@ -520,8 +478,8 @@ class StorageStyle(object):
self.read_only = read_only self.read_only = read_only
# Convert suffix to correct string type. # Convert suffix to correct string type.
if self.suffix and self.as_type is six.text_type \ if self.suffix and self.as_type is str \
and not isinstance(self.suffix, six.text_type): and not isinstance(self.suffix, str):
self.suffix = self.suffix.decode('utf-8') self.suffix = self.suffix.decode('utf-8')
# Getter. # Getter.
@ -544,7 +502,7 @@ class StorageStyle(object):
"""Given a raw value stored on a Mutagen object, decode and """Given a raw value stored on a Mutagen object, decode and
return the represented value. return the represented value.
""" """
if self.suffix and isinstance(mutagen_value, six.text_type) \ if self.suffix and isinstance(mutagen_value, str) \
and mutagen_value.endswith(self.suffix): and mutagen_value.endswith(self.suffix):
return mutagen_value[:-len(self.suffix)] return mutagen_value[:-len(self.suffix)]
else: else:
@ -566,17 +524,17 @@ class StorageStyle(object):
"""Convert the external Python value to a type that is suitable for """Convert the external Python value to a type that is suitable for
storing in a Mutagen file object. storing in a Mutagen file object.
""" """
if isinstance(value, float) and self.as_type is six.text_type: if isinstance(value, float) and self.as_type is str:
value = u'{0:.{1}f}'.format(value, self.float_places) value = u'{0:.{1}f}'.format(value, self.float_places)
value = self.as_type(value) value = self.as_type(value)
elif self.as_type is six.text_type: elif self.as_type is str:
if isinstance(value, bool): if isinstance(value, bool):
# Store bools as 1/0 instead of True/False. # Store bools as 1/0 instead of True/False.
value = six.text_type(int(bool(value))) value = str(int(bool(value)))
elif isinstance(value, bytes): elif isinstance(value, bytes):
value = value.decode('utf-8', 'ignore') value = value.decode('utf-8', 'ignore')
else: else:
value = six.text_type(value) value = str(value)
else: else:
value = self.as_type(value) value = self.as_type(value)
@ -600,8 +558,8 @@ class ListStorageStyle(StorageStyle):
object to each. object to each.
Subclasses may overwrite ``fetch`` and ``store``. ``fetch`` must Subclasses may overwrite ``fetch`` and ``store``. ``fetch`` must
return a (possibly empty) list and ``store`` receives a serialized return a (possibly empty) list or `None` if the tag does not exist.
list of values as the second argument. ``store`` receives a serialized list of values as the second argument.
The `serialize` and `deserialize` methods (from the base The `serialize` and `deserialize` methods (from the base
`StorageStyle`) are still called with individual values. This class `StorageStyle`) are still called with individual values. This class
@ -610,15 +568,23 @@ class ListStorageStyle(StorageStyle):
def get(self, mutagen_file): def get(self, mutagen_file):
"""Get the first value in the field's value list. """Get the first value in the field's value list.
""" """
values = self.get_list(mutagen_file)
if values is None:
return None
try: try:
return self.get_list(mutagen_file)[0] return values[0]
except IndexError: except IndexError:
return None return None
def get_list(self, mutagen_file): def get_list(self, mutagen_file):
"""Get a list of all values for the field using this style. """Get a list of all values for the field using this style.
""" """
return [self.deserialize(item) for item in self.fetch(mutagen_file)] raw_values = self.fetch(mutagen_file)
if raw_values is None:
return None
return [self.deserialize(item) for item in raw_values]
def fetch(self, mutagen_file): def fetch(self, mutagen_file):
"""Get the list of raw (serialized) values. """Get the list of raw (serialized) values.
@ -626,19 +592,27 @@ class ListStorageStyle(StorageStyle):
try: try:
return mutagen_file[self.key] return mutagen_file[self.key]
except KeyError: except KeyError:
return [] return None
def set(self, mutagen_file, value): def set(self, mutagen_file, value):
"""Set an individual value as the only value for the field using """Set an individual value as the only value for the field using
this style. this style.
""" """
if value is None:
self.store(mutagen_file, None)
else:
self.set_list(mutagen_file, [value]) self.set_list(mutagen_file, [value])
def set_list(self, mutagen_file, values): def set_list(self, mutagen_file, values):
"""Set all values for the field using this style. `values` """Set all values for the field using this style. `values`
should be an iterable. should be an iterable.
""" """
self.store(mutagen_file, [self.serialize(value) for value in values]) if values is None:
self.delete(mutagen_file)
else:
self.store(
mutagen_file, [self.serialize(value) for value in values]
)
def store(self, mutagen_file, values): def store(self, mutagen_file, values):
"""Set the list of all raw (serialized) values for this field. """Set the list of all raw (serialized) values for this field.
@ -686,7 +660,7 @@ class MP4StorageStyle(StorageStyle):
def serialize(self, value): def serialize(self, value):
value = super(MP4StorageStyle, self).serialize(value) value = super(MP4StorageStyle, self).serialize(value)
if self.key.startswith('----:') and isinstance(value, six.text_type): if self.key.startswith('----:') and isinstance(value, str):
value = value.encode('utf-8') value = value.encode('utf-8')
return value return value
@ -865,7 +839,7 @@ class MP3UFIDStorageStyle(MP3StorageStyle):
def store(self, mutagen_file, value): def store(self, mutagen_file, value):
# This field type stores text data as encoded data. # This field type stores text data as encoded data.
assert isinstance(value, six.text_type) assert isinstance(value, str)
value = value.encode('utf-8') value = value.encode('utf-8')
frames = mutagen_file.tags.getall(self.key) frames = mutagen_file.tags.getall(self.key)
@ -889,7 +863,7 @@ class MP3DescStorageStyle(MP3StorageStyle):
""" """
def __init__(self, desc=u'', key='TXXX', attr='text', multispec=True, def __init__(self, desc=u'', key='TXXX', attr='text', multispec=True,
**kwargs): **kwargs):
assert isinstance(desc, six.text_type) assert isinstance(desc, str)
self.description = desc self.description = desc
self.attr = attr self.attr = attr
self.multispec = multispec self.multispec = multispec
@ -978,7 +952,7 @@ class MP3SlashPackStorageStyle(MP3StorageStyle):
def _fetch_unpacked(self, mutagen_file): def _fetch_unpacked(self, mutagen_file):
data = self.fetch(mutagen_file) data = self.fetch(mutagen_file)
if data: if data:
items = six.text_type(data).split('/') items = str(data).split('/')
else: else:
items = [] items = []
packing_length = 2 packing_length = 2
@ -994,7 +968,7 @@ class MP3SlashPackStorageStyle(MP3StorageStyle):
items[0] = '' items[0] = ''
if items[1] is None: if items[1] is None:
items.pop() # Do not store last value items.pop() # Do not store last value
self.store(mutagen_file, '/'.join(map(six.text_type, items))) self.store(mutagen_file, '/'.join(map(str, items)))
def delete(self, mutagen_file): def delete(self, mutagen_file):
if self.pack_pos == 0: if self.pack_pos == 0:
@ -1261,7 +1235,7 @@ class MediaField(object):
getting this property. getting this property.
""" """
self.out_type = kwargs.get('out_type', six.text_type) self.out_type = kwargs.get('out_type', str)
self._styles = styles self._styles = styles
def styles(self, mutagen_file): def styles(self, mutagen_file):
@ -1301,7 +1275,7 @@ class MediaField(object):
return 0.0 return 0.0
elif self.out_type == bool: elif self.out_type == bool:
return False return False
elif self.out_type == six.text_type: elif self.out_type == str:
return u'' return u''
@ -1317,7 +1291,7 @@ class ListMediaField(MediaField):
values = style.get_list(mediafile.mgfile) values = style.get_list(mediafile.mgfile)
if values: if values:
return [_safe_cast(self.out_type, value) for value in values] return [_safe_cast(self.out_type, value) for value in values]
return [] return None
def __set__(self, mediafile, values): def __set__(self, mediafile, values):
for style in self.styles(mediafile.mgfile): for style in self.styles(mediafile.mgfile):
@ -1384,9 +1358,9 @@ class DateField(MediaField):
""" """
# Get the underlying data and split on hyphens and slashes. # Get the underlying data and split on hyphens and slashes.
datestring = super(DateField, self).__get__(mediafile, None) datestring = super(DateField, self).__get__(mediafile, None)
if isinstance(datestring, six.string_types): if isinstance(datestring, str):
datestring = re.sub(r'[Tt ].*$', '', six.text_type(datestring)) datestring = re.sub(r'[Tt ].*$', '', str(datestring))
items = re.split('[-/]', six.text_type(datestring)) items = re.split('[-/]', str(datestring))
else: else:
items = [] items = []
@ -1423,7 +1397,7 @@ class DateField(MediaField):
date.append(u'{0:02d}'.format(int(month))) date.append(u'{0:02d}'.format(int(month)))
if month and day: if month and day:
date.append(u'{0:02d}'.format(int(day))) date.append(u'{0:02d}'.format(int(day)))
date = map(six.text_type, date) date = map(str, date)
super(DateField, self).__set__(mediafile, u'-'.join(date)) super(DateField, self).__set__(mediafile, u'-'.join(date))
if hasattr(self, '_year_field'): if hasattr(self, '_year_field'):
@ -2071,6 +2045,7 @@ class MediaFile(object):
original_date = DateField( original_date = DateField(
MP3StorageStyle('TDOR'), MP3StorageStyle('TDOR'),
MP4StorageStyle('----:com.apple.iTunes:ORIGINAL YEAR'), MP4StorageStyle('----:com.apple.iTunes:ORIGINAL YEAR'),
MP4StorageStyle('----:com.apple.iTunes:ORIGINALDATE'),
StorageStyle('ORIGINALDATE'), StorageStyle('ORIGINALDATE'),
ASFStorageStyle('WM/OriginalReleaseYear')) ASFStorageStyle('WM/OriginalReleaseYear'))
@ -2085,12 +2060,36 @@ class MediaFile(object):
StorageStyle('ARTIST_CREDIT'), StorageStyle('ARTIST_CREDIT'),
ASFStorageStyle('beets/Artist Credit'), ASFStorageStyle('beets/Artist Credit'),
) )
artists_credit = ListMediaField(
MP3ListDescStorageStyle(desc=u'ARTISTS_CREDIT'),
MP4ListStorageStyle('----:com.apple.iTunes:ARTISTS_CREDIT'),
ListStorageStyle('ARTISTS_CREDIT'),
ASFStorageStyle('beets/ArtistsCredit'),
)
artists_sort = ListMediaField(
MP3ListDescStorageStyle(desc=u'ARTISTS_SORT'),
MP4ListStorageStyle('----:com.apple.iTunes:ARTISTS_SORT'),
ListStorageStyle('ARTISTS_SORT'),
ASFStorageStyle('beets/ArtistsSort'),
)
albumartist_credit = MediaField( albumartist_credit = MediaField(
MP3DescStorageStyle(u'Album Artist Credit'), MP3DescStorageStyle(u'Album Artist Credit'),
MP4StorageStyle('----:com.apple.iTunes:Album Artist Credit'), MP4StorageStyle('----:com.apple.iTunes:Album Artist Credit'),
StorageStyle('ALBUMARTIST_CREDIT'), StorageStyle('ALBUMARTIST_CREDIT'),
ASFStorageStyle('beets/Album Artist Credit'), ASFStorageStyle('beets/Album Artist Credit'),
) )
albumartists_credit = ListMediaField(
MP3ListDescStorageStyle(desc=u'ALBUMARTISTS_CREDIT'),
MP4ListStorageStyle('----:com.apple.iTunes:ALBUMARTISTS_CREDIT'),
ListStorageStyle('ALBUMARTISTS_CREDIT'),
ASFStorageStyle('beets/AlbumArtistsCredit'),
)
albumartists_sort = ListMediaField(
MP3ListDescStorageStyle(desc=u'ALBUMARTISTS_SORT'),
MP4ListStorageStyle('----:com.apple.iTunes:ALBUMARTISTS_SORT'),
ListStorageStyle('ALBUMARTISTS_SORT'),
ASFStorageStyle('beets/AlbumArtistsSort'),
)
# Legacy album art field # Legacy album art field
art = CoverArtField() art = CoverArtField()