diff --git a/libs/_yaml.cp37-win32.pyd b/libs/_yaml.cp37-win32.pyd new file mode 100644 index 00000000..fdfc7112 Binary files /dev/null and b/libs/_yaml.cp37-win32.pyd differ diff --git a/libs/beets/__init__.py b/libs/beets/__init__.py index 830477a9..b8fe2a84 100644 --- a/libs/beets/__init__.py +++ b/libs/beets/__init__.py @@ -19,7 +19,7 @@ import os from beets.util import confit -__version__ = u'1.3.18' +__version__ = u'1.4.7' __author__ = u'Adrian Sampson ' diff --git a/libs/beets/__main__.py b/libs/beets/__main__.py new file mode 100644 index 00000000..8010ca0d --- /dev/null +++ b/libs/beets/__main__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2017, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""The __main__ module lets you run the beets CLI interface by typing +`python -m beets`. +""" + +from __future__ import division, absolute_import, print_function + +import sys +from .ui import main + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/libs/beets/art.py b/libs/beets/art.py index 7a65a2b8..979a6f72 100644 --- a/libs/beets/art.py +++ b/libs/beets/art.py @@ -22,10 +22,9 @@ from __future__ import division, absolute_import, print_function import subprocess import platform from tempfile import NamedTemporaryFile -import imghdr import os -from beets.util import displayable_path, syspath +from beets.util import displayable_path, syspath, bytestring_path from beets.util.artresizer import ArtResizer from beets import mediafile @@ -124,26 +123,49 @@ def check_art_similarity(log, item, imagepath, compare_threshold): is_windows = platform.system() == "Windows" # Converting images to grayscale tends to minimize the weight - # of colors in the diff score. + # of colors in the diff score. So we first convert both images + # to grayscale and then pipe them into the `compare` command. + # On Windows, ImageMagick doesn't support the magic \\?\ prefix + # on paths, so we pass `prefix=False` to `syspath`. + convert_cmd = ['convert', syspath(imagepath, prefix=False), + syspath(art, prefix=False), + '-colorspace', 'gray', 'MIFF:-'] + compare_cmd = ['compare', '-metric', 'PHASH', '-', 'null:'] + log.debug(u'comparing images with pipeline {} | {}', + convert_cmd, compare_cmd) convert_proc = subprocess.Popen( - [b'convert', syspath(imagepath), syspath(art), - b'-colorspace', b'gray', b'MIFF:-'], + convert_cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, close_fds=not is_windows, ) compare_proc = subprocess.Popen( - [b'compare', b'-metric', b'PHASH', b'-', b'null:'], + compare_cmd, stdin=convert_proc.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=not is_windows, ) - convert_proc.stdout.close() + # Check the convert output. We're not interested in the + # standard output; that gets piped to the next stage. + convert_proc.stdout.close() + convert_stderr = convert_proc.stderr.read() + convert_proc.stderr.close() + convert_proc.wait() + if convert_proc.returncode: + log.debug( + u'ImageMagick convert failed with status {}: {!r}', + convert_proc.returncode, + convert_stderr, + ) + return + + # Check the compare output. stdout, stderr = compare_proc.communicate() if compare_proc.returncode: if compare_proc.returncode != 1: - log.debug(u'IM phashes compare failed for {0}, {1}', + log.debug(u'ImageMagick compare failed: {0}, {1}', displayable_path(imagepath), displayable_path(art)) return @@ -157,7 +179,7 @@ def check_art_similarity(log, item, imagepath, compare_threshold): log.debug(u'IM output is not a number: {0!r}', out_str) return - log.debug(u'compare PHASH score is {0}', phash_diff) + log.debug(u'ImageMagick compare score: {0}', phash_diff) return phash_diff <= compare_threshold return True @@ -165,18 +187,18 @@ def check_art_similarity(log, item, imagepath, compare_threshold): def extract(log, outpath, item): art = get_art(log, item) - + outpath = bytestring_path(outpath) if not art: log.info(u'No album art present in {0}, skipping.', item) return # Add an extension to the filename. - ext = imghdr.what(None, h=art) + ext = mediafile.image_extension(art) if not ext: log.warning(u'Unknown image type in {0}.', displayable_path(item.path)) return - outpath += b'.' + ext + outpath += bytestring_path('.' + ext) log.info(u'Extracting album art from: {0} to: {1}', item, displayable_path(outpath)) diff --git a/libs/beets/autotag/__init__.py b/libs/beets/autotag/__init__.py index f8233be6..c4ee1300 100644 --- a/libs/beets/autotag/__init__.py +++ b/libs/beets/autotag/__init__.py @@ -23,7 +23,7 @@ from beets import config # Parts of external interface. from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch # noqa -from .match import tag_item, tag_album # noqa +from .match import tag_item, tag_album, Proposal # noqa from .match import Recommendation # noqa # Global logger. @@ -40,10 +40,21 @@ def apply_item_metadata(item, track_info): item.artist_credit = track_info.artist_credit item.title = track_info.title item.mb_trackid = track_info.track_id + item.mb_releasetrackid = track_info.release_track_id if track_info.artist_id: item.mb_artistid = track_info.artist_id if track_info.data_source: item.data_source = track_info.data_source + + if track_info.lyricist is not None: + item.lyricist = track_info.lyricist + if track_info.composer is not None: + item.composer = track_info.composer + if track_info.composer_sort is not None: + item.composer_sort = track_info.composer_sort + if track_info.arranger is not None: + item.arranger = track_info.arranger + # At the moment, the other metadata is left intact (including album # and track number). Perhaps these should be emptied? @@ -52,13 +63,20 @@ def apply_metadata(album_info, mapping): """Set the items' metadata to match an AlbumInfo object using a mapping from Items to TrackInfo objects. """ - for item, track_info in mapping.iteritems(): - # Album, artist, track count. - if track_info.artist: - item.artist = track_info.artist + for item, track_info in mapping.items(): + # Artist or artist credit. + if config['artist_credit']: + item.artist = (track_info.artist_credit or + track_info.artist or + album_info.artist_credit or + album_info.artist) + item.albumartist = (album_info.artist_credit or + album_info.artist) else: - item.artist = album_info.artist - item.albumartist = album_info.artist + item.artist = (track_info.artist or album_info.artist) + item.albumartist = album_info.artist + + # Album. item.album = album_info.album # Artist sort and credit names. @@ -97,8 +115,9 @@ def apply_metadata(album_info, mapping): if config['per_disc_numbering']: # We want to let the track number be zero, but if the medium index # is not provided we need to fall back to the overall index. - item.track = track_info.medium_index - if item.track is None: + if track_info.medium_index is not None: + item.track = track_info.medium_index + else: item.track = track_info.index item.tracktotal = track_info.medium_total or len(album_info.tracks) else: @@ -111,6 +130,7 @@ def apply_metadata(album_info, mapping): # MusicBrainz IDs. item.mb_trackid = track_info.track_id + item.mb_releasetrackid = track_info.release_track_id item.mb_albumid = album_info.album_id if track_info.artist_id: item.mb_artistid = track_info.artist_id @@ -141,3 +161,14 @@ def apply_metadata(album_info, mapping): if track_info.media is not None: item.media = track_info.media + + if track_info.lyricist is not None: + item.lyricist = track_info.lyricist + if track_info.composer is not None: + item.composer = track_info.composer + if track_info.composer_sort is not None: + item.composer_sort = track_info.composer_sort + if track_info.arranger is not None: + item.arranger = track_info.arranger + + item.track_alt = track_info.track_alt diff --git a/libs/beets/autotag/hooks.py b/libs/beets/autotag/hooks.py index 3de80389..3615a933 100644 --- a/libs/beets/autotag/hooks.py +++ b/libs/beets/autotag/hooks.py @@ -17,14 +17,17 @@ from __future__ import division, absolute_import, print_function from collections import namedtuple +from functools import total_ordering import re from beets import logging from beets import plugins from beets import config +from beets.util import as_string from beets.autotag import mb from jellyfish import levenshtein_distance from unidecode import unidecode +import six log = logging.getLogger('beets') @@ -104,7 +107,7 @@ class AlbumInfo(object): # Work around a bug in python-musicbrainz-ngs that causes some # strings to be bytes rather than Unicode. # https://github.com/alastair/python-musicbrainz-ngs/issues/85 - def decode(self, codec='utf8'): + def decode(self, codec='utf-8'): """Ensure that all string attributes on this object, and the constituent `TrackInfo` objects, are decoded to Unicode. """ @@ -126,6 +129,8 @@ class TrackInfo(object): - ``title``: name of the track - ``track_id``: MusicBrainz ID; UUID fragment only + - ``release_track_id``: MusicBrainz ID respective to a track on a + particular release; UUID fragment only - ``artist``: individual track artist name - ``artist_id`` - ``length``: float: duration of the track in seconds @@ -139,18 +144,25 @@ class TrackInfo(object): - ``artist_credit``: Recording-specific artist name - ``data_source``: The original data source (MusicBrainz, Discogs, etc.) - ``data_url``: The data source release URL. + - ``lyricist``: individual track lyricist name + - ``composer``: individual track composer name + - ``composer_sort``: individual track composer sort name + - ``arranger`: individual track arranger name + - ``track_alt``: alternative track number (tape, vinyl, etc.) Only ``title`` and ``track_id`` are required. The rest of the fields may be None. The indices ``index``, ``medium``, and ``medium_index`` are all 1-based. """ - def __init__(self, title, track_id, artist=None, artist_id=None, - length=None, index=None, medium=None, medium_index=None, - medium_total=None, artist_sort=None, disctitle=None, - artist_credit=None, data_source=None, data_url=None, - media=None): + def __init__(self, title, track_id, release_track_id=None, artist=None, + artist_id=None, length=None, index=None, medium=None, + medium_index=None, medium_total=None, artist_sort=None, + disctitle=None, artist_credit=None, data_source=None, + data_url=None, media=None, lyricist=None, composer=None, + composer_sort=None, arranger=None, track_alt=None): self.title = title self.track_id = track_id + self.release_track_id = release_track_id self.artist = artist self.artist_id = artist_id self.length = length @@ -164,9 +176,14 @@ class TrackInfo(object): self.artist_credit = artist_credit self.data_source = data_source self.data_url = data_url + self.lyricist = lyricist + self.composer = composer + self.composer_sort = composer_sort + self.arranger = arranger + self.track_alt = track_alt # As above, work around a bug in python-musicbrainz-ngs. - def decode(self, codec='utf8'): + def decode(self, codec='utf-8'): """Ensure that all string attributes on this object are decoded to Unicode. """ @@ -203,10 +220,10 @@ def _string_dist_basic(str1, str2): transliteration/lowering to ASCII characters. Normalized by string length. """ - assert isinstance(str1, unicode) - assert isinstance(str2, unicode) - str1 = unidecode(str1).decode('ascii') - str2 = unidecode(str2).decode('ascii') + assert isinstance(str1, six.text_type) + assert isinstance(str2, six.text_type) + str1 = as_string(unidecode(str1)) + str2 = as_string(unidecode(str2)) str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) str2 = re.sub(r'[^a-z0-9]', '', str2.lower()) if not str1 and not str2: @@ -288,6 +305,8 @@ class LazyClassProperty(object): return self.value +@total_ordering +@six.python_2_unicode_compatible class Distance(object): """Keeps track of multiple distance penalties. Provides a single weighted distance for all penalties as well as a weighted distance @@ -323,7 +342,7 @@ class Distance(object): """Return the maximum distance penalty (normalization factor). """ dist_max = 0.0 - for key, penalty in self._penalties.iteritems(): + for key, penalty in self._penalties.items(): dist_max += len(penalty) * self._weights[key] return dist_max @@ -332,7 +351,7 @@ class Distance(object): """Return the raw (denormalized) distance. """ dist_raw = 0.0 - for key, penalty in self._penalties.iteritems(): + for key, penalty in self._penalties.items(): dist_raw += sum(penalty) * self._weights[key] return dist_raw @@ -354,10 +373,16 @@ class Distance(object): key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0]) ) + def __hash__(self): + return id(self) + + def __eq__(self, other): + return self.distance == other + # Behave like a float. - def __cmp__(self, other): - return cmp(self.distance, other) + def __lt__(self, other): + return self.distance < other def __float__(self): return self.distance @@ -368,7 +393,7 @@ class Distance(object): def __rsub__(self, other): return other - self.distance - def __unicode__(self): + def __str__(self): return "{0:.2f}".format(self.distance) # Behave like a dict. @@ -398,7 +423,7 @@ class Distance(object): raise ValueError( u'`dist` must be a Distance object, not {0}'.format(type(dist)) ) - for key, penalties in dist._penalties.iteritems(): + for key, penalties in dist._penalties.items(): self._penalties.setdefault(key, []).extend(penalties) # Adding components. @@ -537,24 +562,27 @@ def track_for_mbid(recording_id): def albums_for_id(album_id): """Get a list of albums for an ID.""" - candidates = [album_for_mbid(album_id)] - plugin_albums = plugins.album_for_id(album_id) - for a in plugin_albums: - plugins.send(u'albuminfo_received', info=a) - candidates.extend(plugin_albums) - return filter(None, candidates) + a = album_for_mbid(album_id) + if a: + yield a + for a in plugins.album_for_id(album_id): + if a: + plugins.send(u'albuminfo_received', info=a) + yield a def tracks_for_id(track_id): """Get a list of tracks for an ID.""" - candidates = [track_for_mbid(track_id)] - plugin_tracks = plugins.track_for_id(track_id) - for t in plugin_tracks: - plugins.send(u'trackinfo_received', info=t) - candidates.extend(plugin_tracks) - return filter(None, candidates) + t = track_for_mbid(track_id) + if t: + yield t + for t in plugins.track_for_id(track_id): + if t: + plugins.send(u'trackinfo_received', info=t) + yield t +@plugins.notify_info_yielded(u'albuminfo_received') def album_candidates(items, artist, album, va_likely): """Search for album matches. ``items`` is a list of Item objects that make up the album. ``artist`` and ``album`` are the respective @@ -562,51 +590,42 @@ def album_candidates(items, artist, album, va_likely): entered by the user. ``va_likely`` is a boolean indicating whether the album is likely to be a "various artists" release. """ - out = [] - # Base candidates if we have album and artist to match. if artist and album: try: - out.extend(mb.match_album(artist, album, len(items))) + for candidate in mb.match_album(artist, album, len(items)): + yield candidate except mb.MusicBrainzAPIError as exc: exc.log(log) # Also add VA matches from MusicBrainz where appropriate. if va_likely and album: try: - out.extend(mb.match_album(None, album, len(items))) + for candidate in mb.match_album(None, album, len(items)): + yield candidate except mb.MusicBrainzAPIError as exc: exc.log(log) # Candidates from plugins. - out.extend(plugins.candidates(items, artist, album, va_likely)) - - # Notify subscribed plugins about fetched album info - for a in out: - plugins.send(u'albuminfo_received', info=a) - - return out + for candidate in plugins.candidates(items, artist, album, va_likely): + yield candidate +@plugins.notify_info_yielded(u'trackinfo_received') def item_candidates(item, artist, title): """Search for item matches. ``item`` is the Item to be matched. ``artist`` and ``title`` are strings and either reflect the item or are specified by the user. """ - out = [] # MusicBrainz candidates. if artist and title: try: - out.extend(mb.match_track(artist, title)) + for candidate in mb.match_track(artist, title): + yield candidate except mb.MusicBrainzAPIError as exc: exc.log(log) # Plugin candidates. - out.extend(plugins.item_candidates(item, artist, title)) - - # Notify subscribed plugins about fetched track info - for i in out: - plugins.send(u'trackinfo_received', info=i) - - return out + for candidate in plugins.item_candidates(item, artist, title): + yield candidate diff --git a/libs/beets/autotag/match.py b/libs/beets/autotag/match.py index cfe184e7..71b62adb 100644 --- a/libs/beets/autotag/match.py +++ b/libs/beets/autotag/match.py @@ -22,6 +22,7 @@ from __future__ import division, absolute_import, print_function import datetime import re from munkres import Munkres +from collections import namedtuple from beets import logging from beets import plugins @@ -29,7 +30,6 @@ from beets import config from beets.util import plurality from beets.autotag import hooks from beets.util.enumeration import OrderedEnum -from functools import reduce # Artist signals that indicate "various artists". These are used at the # album level to determine whether a given release is likely a VA @@ -53,6 +53,13 @@ class Recommendation(OrderedEnum): strong = 3 +# A structure for holding a set of possible matches to choose between. This +# consists of a list of possible candidates (i.e., AlbumInfo or TrackInfo +# objects) and a recommendation value. + +Proposal = namedtuple('Proposal', ('candidates', 'recommendation')) + + # Primary matching functionality. def current_metadata(items): @@ -96,7 +103,9 @@ def assign_items(items, tracks): costs.append(row) # Find a minimum-cost bipartite matching. + log.debug('Computing track assignment...') matching = Munkres().compute(costs) + log.debug('...done.') # Produce the output matching. mapping = dict((items[i], tracks[j]) for (i, j) in matching) @@ -238,7 +247,7 @@ def distance(items, album_info, mapping): # Tracks. dist.tracks = {} - for item, track in mapping.iteritems(): + for item, track in mapping.items(): dist.tracks[track] = track_distance(item, track, album_info.va) dist.add('tracks', dist.tracks[track].distance) @@ -261,19 +270,23 @@ def match_by_id(items): AlbumInfo object for the corresponding album. Otherwise, returns None. """ - # Is there a consensus on the MB album ID? - albumids = [item.mb_albumid for item in items if item.mb_albumid] - if not albumids: - log.debug(u'No album IDs found.') + albumids = (item.mb_albumid for item in items if item.mb_albumid) + + # Did any of the items have an MB album ID? + try: + first = next(albumids) + except StopIteration: + log.debug(u'No album ID found.') return None + # Is there a consensus on the MB album ID? + for other in albumids: + if other != first: + log.debug(u'No album ID consensus.') + return None # If all album IDs are equal, look up the album. - if bool(reduce(lambda x, y: x if x == y else (), albumids)): - albumid = albumids[0] - log.debug(u'Searching for discovered album ID: {0}', albumid) - return hooks.album_for_mbid(albumid) - else: - log.debug(u'No album ID consensus.') + log.debug(u'Searching for discovered album ID: {0}', first) + return hooks.album_for_mbid(first) def _recommendation(results): @@ -312,10 +325,10 @@ def _recommendation(results): keys = set(min_dist.keys()) if isinstance(results[0], hooks.AlbumMatch): for track_dist in min_dist.tracks.values(): - keys.update(track_dist.keys()) + keys.update(list(track_dist.keys())) max_rec_view = config['match']['max_rec'] for key in keys: - if key in max_rec_view.keys(): + if key in list(max_rec_view.keys()): max_rec = max_rec_view[key].as_choice({ 'strong': Recommendation.strong, 'medium': Recommendation.medium, @@ -327,13 +340,19 @@ def _recommendation(results): return rec +def _sort_candidates(candidates): + """Sort candidates by distance.""" + return sorted(candidates, key=lambda match: match.distance) + + def _add_candidate(items, results, info): """Given a candidate AlbumInfo object, attempt to add the candidate to the output dictionary of AlbumMatch objects. This involves checking the track count, ordering the items, checking for duplicates, and calculating the distance. """ - log.debug(u'Candidate: {0} - {1}', info.artist, info.album) + log.debug(u'Candidate: {0} - {1} ({2})', + info.artist, info.album, info.album_id) # Discard albums with zero tracks. if not info.tracks: @@ -371,9 +390,8 @@ def _add_candidate(items, results, info): def tag_album(items, search_artist=None, search_album=None, search_ids=[]): - """Return a tuple of a artist name, an album name, a list of - `AlbumMatch` candidates from the metadata backend, and a - `Recommendation`. + """Return a tuple of the current artist name, the current album + name, and a `Proposal` containing `AlbumMatch` candidates. The artist and album are the most common values of these fields among `items`. @@ -401,10 +419,10 @@ def tag_album(items, search_artist=None, search_album=None, # Search by explicit ID. if search_ids: - search_cands = [] for search_id in search_ids: log.debug(u'Searching for album ID: {0}', search_id) - search_cands.extend(hooks.albums_for_id(search_id)) + for id_candidate in hooks.albums_for_id(search_id): + _add_candidate(items, candidates, id_candidate) # Use existing metadata or text search. else: @@ -412,7 +430,7 @@ def tag_album(items, search_artist=None, search_album=None, id_info = match_by_id(items) if id_info: _add_candidate(items, candidates, id_info) - rec = _recommendation(candidates.values()) + rec = _recommendation(list(candidates.values())) log.debug(u'Album ID match recommendation is {0}', rec) if candidates and not config['import']['timid']: # If we have a very good MBID match, return immediately. @@ -420,7 +438,8 @@ def tag_album(items, search_artist=None, search_album=None, # matches. if rec == Recommendation.strong: log.debug(u'ID match.') - return cur_artist, cur_album, candidates.values(), rec + return cur_artist, cur_album, \ + Proposal(list(candidates.values()), rec) # Search terms. if not (search_artist and search_album): @@ -435,24 +454,25 @@ def tag_album(items, search_artist=None, search_album=None, log.debug(u'Album might be VA: {0}', va_likely) # Get the results from the data sources. - search_cands = hooks.album_candidates(items, search_artist, - search_album, va_likely) - - log.debug(u'Evaluating {0} candidates.', len(search_cands)) - for info in search_cands: - _add_candidate(items, candidates, info) + for matched_candidate in hooks.album_candidates(items, + search_artist, + search_album, + va_likely): + _add_candidate(items, candidates, matched_candidate) + log.debug(u'Evaluating {0} candidates.', len(candidates)) # Sort and get the recommendation. - candidates = sorted(candidates.itervalues()) + candidates = _sort_candidates(candidates.values()) rec = _recommendation(candidates) - return cur_artist, cur_album, candidates, rec + return cur_artist, cur_album, Proposal(candidates, rec) def tag_item(item, search_artist=None, search_title=None, search_ids=[]): - """Attempts to find metadata for a single track. Returns a - `(candidates, recommendation)` pair where `candidates` is a list of - TrackMatch objects. `search_artist` and `search_title` may be used + """Find metadata for a single track. Return a `Proposal` consisting + of `TrackMatch` objects. + + `search_artist` and `search_title` may be used to override the current metadata for the purposes of the MusicBrainz title. `search_ids` may be used for restricting the search to a list of metadata backend IDs. @@ -462,7 +482,7 @@ def tag_item(item, search_artist=None, search_title=None, candidates = {} # First, try matching by MusicBrainz ID. - trackids = search_ids or filter(None, [item.mb_trackid]) + trackids = search_ids or [t for t in [item.mb_trackid] if t] if trackids: for trackid in trackids: log.debug(u'Searching for track ID: {0}', trackid) @@ -471,18 +491,18 @@ def tag_item(item, search_artist=None, search_title=None, candidates[track_info.track_id] = \ hooks.TrackMatch(dist, track_info) # If this is a good match, then don't keep searching. - rec = _recommendation(sorted(candidates.itervalues())) + rec = _recommendation(_sort_candidates(candidates.values())) if rec == Recommendation.strong and \ not config['import']['timid']: log.debug(u'Track ID match.') - return sorted(candidates.itervalues()), rec + return Proposal(_sort_candidates(candidates.values()), rec) # If we're searching by ID, don't proceed. if search_ids: if candidates: - return sorted(candidates.itervalues()), rec + return Proposal(_sort_candidates(candidates.values()), rec) else: - return [], Recommendation.none + return Proposal([], Recommendation.none) # Search terms. if not (search_artist and search_title): @@ -496,6 +516,6 @@ def tag_item(item, search_artist=None, search_title=None, # Sort by distance and return with recommendation. log.debug(u'Found {0} candidates.', len(candidates)) - candidates = sorted(candidates.itervalues()) + candidates = _sort_candidates(candidates.values()) rec = _recommendation(candidates) - return candidates, rec + return Proposal(candidates, rec) diff --git a/libs/beets/autotag/mb.py b/libs/beets/autotag/mb.py index e64da8d5..2b28a5cc 100644 --- a/libs/beets/autotag/mb.py +++ b/libs/beets/autotag/mb.py @@ -20,16 +20,23 @@ from __future__ import division, absolute_import, print_function import musicbrainzngs import re import traceback -from urlparse import urljoin +from six.moves.urllib.parse import urljoin from beets import logging import beets.autotag.hooks import beets from beets import util from beets import config +import six VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' -BASE_URL = 'http://musicbrainz.org/' + +if util.SNI_SUPPORTED: + BASE_URL = 'https://musicbrainz.org/' +else: + BASE_URL = 'http://musicbrainz.org/' + +SKIPPED_TRACKS = ['[data track]'] musicbrainzngs.set_useragent('beets', beets.__version__, 'http://beets.io/') @@ -53,8 +60,12 @@ class MusicBrainzAPIError(util.HumanReadableException): log = logging.getLogger('beets') RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', - 'labels', 'artist-credits', 'aliases'] + 'labels', 'artist-credits', 'aliases', + 'recording-level-rels', 'work-rels', + 'work-level-rels', 'artist-rels'] TRACK_INCLUDES = ['artists', 'aliases'] +if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: + TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] def track_url(trackid): @@ -69,7 +80,8 @@ def configure(): """Set up the python-musicbrainz-ngs module according to settings from the beets configuration. This should be called at startup. """ - musicbrainzngs.set_hostname(config['musicbrainz']['host'].get(unicode)) + hostname = config['musicbrainz']['host'].as_str() + musicbrainzngs.set_hostname(hostname) musicbrainzngs.set_rate_limit( config['musicbrainz']['ratelimit_interval'].as_number(), config['musicbrainz']['ratelimit'].get(int), @@ -99,6 +111,24 @@ def _preferred_alias(aliases): return matches[0] +def _preferred_release_event(release): + """Given a release, select and return the user's preferred release + event as a tuple of (country, release_date). Fall back to the + default release event if a preferred event is not found. + """ + countries = config['match']['preferred']['countries'].as_str_seq() + + for country in countries: + for event in release.get('release-event-list', {}): + try: + if country in event['area']['iso-3166-1-code-list']: + return country, event['date'] + except KeyError: + pass + + return release.get('country'), release.get('date') + + def _flatten_artist_credit(credit): """Given a list representing an ``artist-credit`` block, flatten the data into a triple of joined artist name strings: canonical, sort, and @@ -108,7 +138,7 @@ def _flatten_artist_credit(credit): artist_sort_parts = [] artist_credit_parts = [] for el in credit: - if isinstance(el, basestring): + if isinstance(el, six.string_types): # Join phrase. artist_parts.append(el) artist_credit_parts.append(el) @@ -177,6 +207,37 @@ def track_info(recording, index=None, medium=None, medium_index=None, if recording.get('length'): info.length = int(recording['length']) / (1000.0) + lyricist = [] + composer = [] + composer_sort = [] + for work_relation in recording.get('work-relation-list', ()): + if work_relation['type'] != 'performance': + continue + for artist_relation in work_relation['work'].get( + 'artist-relation-list', ()): + if 'type' in artist_relation: + type = artist_relation['type'] + if type == 'lyricist': + lyricist.append(artist_relation['artist']['name']) + elif type == 'composer': + composer.append(artist_relation['artist']['name']) + composer_sort.append( + artist_relation['artist']['sort-name']) + if lyricist: + info.lyricist = u', '.join(lyricist) + if composer: + info.composer = u', '.join(composer) + info.composer_sort = u', '.join(composer_sort) + + arranger = [] + for artist_relation in recording.get('artist-relation-list', ()): + if 'type' in artist_relation: + type = artist_relation['type'] + if type == 'arranger': + arranger.append(artist_relation['artist']['name']) + if arranger: + info.arranger = u', '.join(arranger) + info.decode() return info @@ -216,11 +277,28 @@ def album_info(release): disctitle = medium.get('title') format = medium.get('format') + if format in config['match']['ignored_media'].as_str_seq(): + continue + all_tracks = medium['track-list'] + if 'data-track-list' in medium: + all_tracks += medium['data-track-list'] + track_count = len(all_tracks) + if 'pregap' in medium: all_tracks.insert(0, medium['pregap']) for track in all_tracks: + + if ('title' in track['recording'] and + track['recording']['title'] in SKIPPED_TRACKS): + continue + + if ('video' in track['recording'] and + track['recording']['video'] == 'true' and + config['match']['ignore_video_tracks']): + continue + # Basic information from the recording. index += 1 ti = track_info( @@ -228,10 +306,12 @@ def album_info(release): index, int(medium['position']), int(track['position']), - len(medium['track-list']), + track_count, ) + ti.release_track_id = track['id'] ti.disctitle = disctitle ti.media = format + ti.track_alt = track['number'] # Prefer track data, where present, over recording data. if track.get('title'): @@ -260,10 +340,9 @@ def album_info(release): ) info.va = info.artist_id == VARIOUS_ARTISTS_ID if info.va: - info.artist = config['va_name'].get(unicode) + info.artist = config['va_name'].as_str() info.asin = release.get('asin') info.releasegroup_id = release['release-group']['id'] - info.country = release.get('country') info.albumstatus = release.get('status') # Build up the disambiguation string from the release group and release. @@ -274,14 +353,28 @@ def album_info(release): disambig.append(release.get('disambiguation')) info.albumdisambig = u', '.join(disambig) - # Release type not always populated. + # Get the "classic" Release type. This data comes from a legacy API + # feature before MusicBrainz supported multiple release types. if 'type' in release['release-group']: reltype = release['release-group']['type'] if reltype: info.albumtype = reltype.lower() - # Release dates. - release_date = release.get('date') + # Log the new-style "primary" and "secondary" release types. + # Eventually, we'd like to actually store this data, but we just log + # it for now to help understand the differences. + if 'primary-type' in release['release-group']: + rel_primarytype = release['release-group']['primary-type'] + if rel_primarytype: + log.debug('primary MB release type: ' + rel_primarytype.lower()) + if 'secondary-type-list' in release['release-group']: + if release['release-group']['secondary-type-list']: + log.debug('secondary MB release type(s): ' + ', '.join( + [secondarytype.lower() for secondarytype in + release['release-group']['secondary-type-list']])) + + # Release events. + info.country, release_date = _preferred_release_event(release) release_group_date = release['release-group'].get('first-release-date') if not release_date: # Fall back if release-specific date is not available. @@ -329,13 +422,14 @@ def match_album(artist, album, tracks=None): # Various Artists search. criteria['arid'] = VARIOUS_ARTISTS_ID if tracks is not None: - criteria['tracks'] = unicode(tracks) + criteria['tracks'] = six.text_type(tracks) # Abort if we have no search terms. - if not any(criteria.itervalues()): + if not any(criteria.values()): return try: + log.debug(u'Searching for MusicBrainz releases with: {!r}', criteria) res = musicbrainzngs.search_releases( limit=config['musicbrainz']['searchlimit'].get(int), **criteria) except musicbrainzngs.MusicBrainzError as exc: @@ -358,7 +452,7 @@ def match_track(artist, title): 'recording': title.lower().strip(), } - if not any(criteria.itervalues()): + if not any(criteria.values()): return try: @@ -376,7 +470,7 @@ def _parse_id(s): no ID can be found, return None. """ # Find the first thing that looks like a UUID/MBID. - match = re.search(ur'[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) + match = re.search(u'[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) if match: return match.group() @@ -386,6 +480,7 @@ def album_for_id(releaseid): object or None if the album is not found. May raise a MusicBrainzAPIError. """ + log.debug(u'Requesting MusicBrainz release {}', releaseid) albumid = _parse_id(releaseid) if not albumid: log.debug(u'Invalid MBID ({0}).', releaseid) diff --git a/libs/beets/config_default.yaml b/libs/beets/config_default.yaml index 4c12c3df..273f9423 100644 --- a/libs/beets/config_default.yaml +++ b/libs/beets/config_default.yaml @@ -6,9 +6,12 @@ import: copy: yes move: no link: no + hardlink: no delete: no resume: ask incremental: no + incremental_skip_later: no + from_scratch: no quiet_fallback: skip none_rec_action: ask timid: no @@ -23,6 +26,9 @@ import: group_albums: no pretend: false search_ids: [] + duplicate_action: ask + bell: no + set_fields: {} clutter: ["Thumbs.DB", ".DS_Store"] ignore: [".*", "*~", "System Volume Information", "lost+found"] @@ -36,6 +42,7 @@ replace: '\.$': _ '\s+$': '' '^\s+': '' + '^-': _ path_sep_replace: _ asciify_paths: false art_filename: cover @@ -49,6 +56,7 @@ per_disc_numbering: no verbose: 0 terminal_encoding: original_date: no +artist_credit: no id3v23: no va_name: "Various Artists" @@ -120,5 +128,7 @@ match: original_year: no ignored: [] required: [] + ignored_media: [] + ignore_video_tracks: yes track_length_grace: 10 track_length_max: 30 diff --git a/libs/beets/dbcore/db.py b/libs/beets/dbcore/db.py index 3f701be5..0f4dc151 100644 --- a/libs/beets/dbcore/db.py +++ b/libs/beets/dbcore/db.py @@ -27,8 +27,19 @@ import collections import beets from beets.util.functemplate import Template +from beets.util import py3_path from beets.dbcore import types from .query import MatchQuery, NullSort, TrueQuery +import six + + +class DBAccessError(Exception): + """The SQLite database became inaccessible. + + This can happen when trying to read or write the database when, for + example, the database file is deleted or otherwise disappears. There + is probably no way to recover from this error. + """ class FormattedMapping(collections.Mapping): @@ -66,10 +77,10 @@ class FormattedMapping(collections.Mapping): def _get_formatted(self, model, key): value = model._type(key).format(model.get(key)) if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') + value = value.decode('utf-8', 'ignore') if self.for_path: - sep_repl = beets.config['path_sep_replace'].get(unicode) + sep_repl = beets.config['path_sep_replace'].as_str() for sep in (os.path.sep, os.path.altsep): if sep: value = value.replace(sep, sep_repl) @@ -176,9 +187,9 @@ class Model(object): ordinary construction are bypassed. """ obj = cls(db) - for key, value in fixed_values.iteritems(): + for key, value in fixed_values.items(): obj._values_fixed[key] = cls._type(key).from_sql(value) - for key, value in flex_values.iteritems(): + for key, value in flex_values.items(): obj._values_flex[key] = cls._type(key).from_sql(value) return obj @@ -206,6 +217,21 @@ class Model(object): if need_id and not self.id: raise ValueError(u'{0} has no id'.format(type(self).__name__)) + def copy(self): + """Create a copy of the model object. + + The field values and other state is duplicated, but the new copy + remains associated with the same database as the old object. + (A simple `copy.deepcopy` will not work because it would try to + duplicate the SQLite connection.) + """ + new = self.__class__() + new._db = self._db + new._values_fixed = self._values_fixed.copy() + new._values_flex = self._values_flex.copy() + new._dirty = self._dirty.copy() + return new + # Essential field accessors. @classmethod @@ -225,14 +251,15 @@ class Model(object): if key in getters: # Computed. return getters[key](self) elif key in self._fields: # Fixed. - return self._values_fixed.get(key) + return self._values_fixed.get(key, self._type(key).null) elif key in self._values_flex: # Flexible. return self._values_flex[key] else: raise KeyError(key) - def __setitem__(self, key, value): - """Assign the value for a field. + def _setitem(self, key, value): + """Assign the value for a field, return whether new and old value + differ. """ # Choose where to place the value. if key in self._fields: @@ -246,9 +273,17 @@ class Model(object): # Assign value and possibly mark as dirty. old_value = source.get(key) source[key] = value - if self._always_dirty or old_value != value: + changed = old_value != value + if self._always_dirty or changed: self._dirty.add(key) + return changed + + def __setitem__(self, key, value): + """Assign the value for a field. + """ + self._setitem(key, value) + def __delitem__(self, key): """Remove a flexible attribute from the model. """ @@ -267,9 +302,9 @@ class Model(object): `computed` parameter controls whether computed (plugin-provided) fields are included in the key list. """ - base_keys = list(self._fields) + self._values_flex.keys() + base_keys = list(self._fields) + list(self._values_flex.keys()) if computed: - return base_keys + self._getters().keys() + return base_keys + list(self._getters().keys()) else: return base_keys @@ -278,7 +313,7 @@ class Model(object): """Get a list of available keys for objects of this type. Includes fixed and computed fields. """ - return list(cls._fields) + cls._getters().keys() + return list(cls._fields) + list(cls._getters().keys()) # Act like a dictionary. @@ -340,15 +375,19 @@ class Model(object): # Database interaction (CRUD methods). - def store(self): + def store(self, fields=None): """Save the object's metadata into the library database. + :param fields: the fields to be stored. If not specified, all fields + will be. """ + if fields is None: + fields = self._fields self._check_db() # Build assignments for query. assignments = [] subvars = [] - for key in self._fields: + for key in fields: if key != 'id' and key in self._dirty: self._dirty.remove(key) assignments.append(key + '=?') @@ -452,7 +491,7 @@ class Model(object): separators will be added to the template. """ # Perform substitution. - if isinstance(template, basestring): + if isinstance(template, six.string_types): template = Template(template) return template.substitute(self.formatted(for_path), self._template_funcs()) @@ -463,7 +502,7 @@ class Model(object): def _parse(cls, key, string): """Parse a string as a value for the given key. """ - if not isinstance(string, basestring): + if not isinstance(string, six.string_types): raise TypeError(u"_parse() argument must be a string") return cls._type(key).parse(string) @@ -593,6 +632,11 @@ class Results(object): return self._row_count def __nonzero__(self): + """Does this result contain any objects? + """ + return self.__bool__() + + def __bool__(self): """Does this result contain any objects? """ return bool(len(self)) @@ -669,8 +713,18 @@ class Transaction(object): """Execute an SQL statement with substitution values and return the row ID of the last affected row. """ - cursor = self.db._connection().execute(statement, subvals) - return cursor.lastrowid + try: + cursor = self.db._connection().execute(statement, subvals) + return cursor.lastrowid + except sqlite3.OperationalError as e: + # In two specific cases, SQLite reports an error while accessing + # the underlying database file. We surface these exceptions as + # DBAccessError so the application can abort. + if e.args[0] in ("attempt to write a readonly database", + "unable to open database file"): + raise DBAccessError(e.args[0]) + else: + raise def script(self, statements): """Execute a string containing multiple SQL statements.""" @@ -685,8 +739,9 @@ class Database(object): """The Model subclasses representing tables in this database. """ - def __init__(self, path): + def __init__(self, path, timeout=5.0): self.path = path + self.timeout = timeout self._connections = {} self._tx_stacks = defaultdict(list) @@ -721,18 +776,36 @@ class Database(object): if thread_id in self._connections: return self._connections[thread_id] else: - # Make a new connection. - conn = sqlite3.connect( - self.path, - timeout=beets.config['timeout'].as_number(), - ) - - # Access SELECT results like dictionaries. - conn.row_factory = sqlite3.Row - + conn = self._create_connection() self._connections[thread_id] = conn return conn + def _create_connection(self): + """Create a SQLite connection to the underlying database. + + Makes a new connection every time. If you need to configure the + connection settings (e.g., add custom functions), override this + method. + """ + # Make a new connection. The `sqlite3` module can't use + # bytestring paths here on Python 3, so we need to + # provide a `str` using `py3_path`. + conn = sqlite3.connect( + py3_path(self.path), timeout=self.timeout + ) + + # Access SELECT results like dictionaries. + conn.row_factory = sqlite3.Row + return conn + + def _close(self): + """Close the all connections to the underlying SQLite database + from all threads. This does not render the database object + unusable; new connections can still be opened on demand. + """ + with self._shared_map_lock: + self._connections.clear() + @contextlib.contextmanager def _tx_stack(self): """A context manager providing access to the current thread's diff --git a/libs/beets/dbcore/query.py b/libs/beets/dbcore/query.py index caf38026..8fb64e20 100644 --- a/libs/beets/dbcore/query.py +++ b/libs/beets/dbcore/query.py @@ -23,6 +23,10 @@ from beets import util from datetime import datetime, timedelta import unicodedata from functools import reduce +import six + +if not six.PY2: + buffer = memoryview # sqlite won't accept memoryview in python 2 class ParsingError(ValueError): @@ -36,6 +40,7 @@ class InvalidQueryError(ParsingError): The query should be a unicode string or a list, which will be space-joined. """ + def __init__(self, query, explanation): if isinstance(query, list): query = " ".join(query) @@ -43,22 +48,24 @@ class InvalidQueryError(ParsingError): super(InvalidQueryError, self).__init__(message) -class InvalidQueryArgumentTypeError(ParsingError): +class InvalidQueryArgumentValueError(ParsingError): """Represent a query argument that could not be converted as expected. It exists to be caught in upper stack levels so a meaningful (i.e. with the query) InvalidQueryError can be raised. """ + def __init__(self, what, expected, detail=None): message = u"'{0}' is not {1}".format(what, expected) if detail: message = u"{0}: {1}".format(message, detail) - super(InvalidQueryArgumentTypeError, self).__init__(message) + super(InvalidQueryArgumentValueError, self).__init__(message) class Query(object): """An abstract class representing a query into the item database. """ + def clause(self): """Generate an SQLite expression implementing the query. @@ -91,6 +98,7 @@ class FieldQuery(Query): string. Subclasses may also provide `col_clause` to implement the same matching functionality in SQLite. """ + def __init__(self, field, pattern, fast=True): self.field = field self.pattern = pattern @@ -130,6 +138,7 @@ class FieldQuery(Query): class MatchQuery(FieldQuery): """A query that looks for exact matches in an item field.""" + def col_clause(self): return self.field + " = ?", [self.pattern] @@ -139,6 +148,7 @@ class MatchQuery(FieldQuery): class NoneQuery(FieldQuery): + """A query that checks whether a field is null.""" def __init__(self, field, fast=True): super(NoneQuery, self).__init__(field, None, fast) @@ -161,6 +171,7 @@ class StringFieldQuery(FieldQuery): """A FieldQuery that converts values to strings before matching them. """ + @classmethod def value_match(cls, pattern, value): """Determine whether the value matches the pattern. The value @@ -178,11 +189,12 @@ class StringFieldQuery(FieldQuery): class SubstringQuery(StringFieldQuery): """A query that matches a substring in a specific item field.""" + def col_clause(self): pattern = (self.pattern - .replace('\\', '\\\\') - .replace('%', '\\%') - .replace('_', '\\_')) + .replace('\\', '\\\\') + .replace('%', '\\%') + .replace('_', '\\_')) search = '%' + pattern + '%' clause = self.field + " like ? escape '\\'" subvals = [search] @@ -200,6 +212,7 @@ class RegexpQuery(StringFieldQuery): Raises InvalidQueryError when the pattern is not a valid regular expression. """ + def __init__(self, field, pattern, fast=True): super(RegexpQuery, self).__init__(field, pattern, fast) pattern = self._normalize(pattern) @@ -207,9 +220,9 @@ class RegexpQuery(StringFieldQuery): self.pattern = re.compile(self.pattern) except re.error as exc: # Invalid regular expression. - raise InvalidQueryArgumentTypeError(pattern, - u"a regular expression", - format(exc)) + raise InvalidQueryArgumentValueError(pattern, + u"a regular expression", + format(exc)) @staticmethod def _normalize(s): @@ -227,9 +240,10 @@ class BooleanQuery(MatchQuery): """Matches a boolean field. Pattern should either be a boolean or a string reflecting a boolean. """ + def __init__(self, field, pattern, fast=True): super(BooleanQuery, self).__init__(field, pattern, fast) - if isinstance(pattern, basestring): + if isinstance(pattern, six.string_types): self.pattern = util.str2bool(pattern) self.pattern = int(self.pattern) @@ -240,17 +254,16 @@ class BytesQuery(MatchQuery): `unicode` equivalently in Python 2. Always use this query instead of `MatchQuery` when matching on BLOB values. """ + def __init__(self, field, pattern): super(BytesQuery, self).__init__(field, pattern) - # Use a buffer representation of the pattern for SQLite + # Use a buffer/memoryview representation of the pattern for SQLite # matching. This instructs SQLite to treat the blob as binary # rather than encoded Unicode. - if isinstance(self.pattern, basestring): - # Implicitly coerce Unicode strings to their bytes - # equivalents. - if isinstance(self.pattern, unicode): - self.pattern = self.pattern.encode('utf8') + if isinstance(self.pattern, (six.text_type, bytes)): + if isinstance(self.pattern, six.text_type): + self.pattern = self.pattern.encode('utf-8') self.buf_pattern = buffer(self.pattern) elif isinstance(self.pattern, buffer): self.buf_pattern = self.pattern @@ -268,6 +281,7 @@ class NumericQuery(FieldQuery): Raises InvalidQueryError when the pattern does not represent an int or a float. """ + def _convert(self, s): """Convert a string to a numeric type (float or int). @@ -283,7 +297,7 @@ class NumericQuery(FieldQuery): try: return float(s) except ValueError: - raise InvalidQueryArgumentTypeError(s, u"an int or a float") + raise InvalidQueryArgumentValueError(s, u"an int or a float") def __init__(self, field, pattern, fast=True): super(NumericQuery, self).__init__(field, pattern, fast) @@ -304,7 +318,7 @@ class NumericQuery(FieldQuery): if self.field not in item: return False value = item[self.field] - if isinstance(value, basestring): + if isinstance(value, six.string_types): value = self._convert(value) if self.point is not None: @@ -335,6 +349,7 @@ class CollectionQuery(Query): """An abstract query class that aggregates other queries. Can be indexed like a list to access the sub-queries. """ + def __init__(self, subqueries=()): self.subqueries = subqueries @@ -387,6 +402,7 @@ class AnyFieldQuery(CollectionQuery): any field. The individual field query class is provided to the constructor. """ + def __init__(self, pattern, fields, cls): self.pattern = pattern self.fields = fields @@ -422,6 +438,7 @@ class MutableCollectionQuery(CollectionQuery): """A collection query whose subqueries may be modified after the query is initialized. """ + def __setitem__(self, key, value): self.subqueries[key] = value @@ -431,6 +448,7 @@ class MutableCollectionQuery(CollectionQuery): class AndQuery(MutableCollectionQuery): """A conjunction of a list of other queries.""" + def clause(self): return self.clause_with_joiner('and') @@ -440,6 +458,7 @@ class AndQuery(MutableCollectionQuery): class OrQuery(MutableCollectionQuery): """A conjunction of a list of other queries.""" + def clause(self): return self.clause_with_joiner('or') @@ -451,6 +470,7 @@ class NotQuery(Query): """A query that matches the negation of its `subquery`, as a shorcut for performing `not(subquery)` without using regular expressions. """ + def __init__(self, subquery): self.subquery = subquery @@ -479,6 +499,7 @@ class NotQuery(Query): class TrueQuery(Query): """A query that always matches.""" + def clause(self): return '1', () @@ -488,6 +509,7 @@ class TrueQuery(Query): class FalseQuery(Query): """A query that never matches.""" + def clause(self): return '0', () @@ -501,9 +523,13 @@ def _to_epoch_time(date): """Convert a `datetime` object to an integer number of seconds since the (local) Unix epoch. """ - epoch = datetime.fromtimestamp(0) - delta = date - epoch - return int(delta.total_seconds()) + if hasattr(date, 'timestamp'): + # The `timestamp` method exists on Python 3.3+. + return int(date.timestamp()) + else: + epoch = datetime.fromtimestamp(0) + delta = date - epoch + return int(delta.total_seconds()) def _parse_periods(pattern): @@ -527,12 +553,23 @@ class Period(object): instants of time during January 2014. """ - precisions = ('year', 'month', 'day') - date_formats = ('%Y', '%Y-%m', '%Y-%m-%d') + precisions = ('year', 'month', 'day', 'hour', 'minute', 'second') + date_formats = ( + ('%Y',), # year + ('%Y-%m',), # month + ('%Y-%m-%d',), # day + ('%Y-%m-%dT%H', '%Y-%m-%d %H'), # hour + ('%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M'), # minute + ('%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S') # second + ) + relative_units = {'y': 365, 'm': 30, 'w': 7, 'd': 1} + relative_re = '(?P[+|-]?)(?P[0-9]+)' + \ + '(?P[y|m|w|d])' def __init__(self, date, precision): """Create a period with the given date (a `datetime` object) and - precision (a string, one of "year", "month", or "day"). + precision (a string, one of "year", "month", "day", "hour", "minute", + or "second"). """ if precision not in Period.precisions: raise ValueError(u'Invalid precision {0}'.format(precision)) @@ -542,20 +579,55 @@ class Period(object): @classmethod def parse(cls, string): """Parse a date and return a `Period` object or `None` if the - string is empty. + string is empty, or raise an InvalidQueryArgumentValueError if + the string cannot be parsed to a date. + + The date may be absolute or relative. Absolute dates look like + `YYYY`, or `YYYY-MM-DD`, or `YYYY-MM-DD HH:MM:SS`, etc. Relative + dates have three parts: + + - Optionally, a ``+`` or ``-`` sign indicating the future or the + past. The default is the future. + - A number: how much to add or subtract. + - A letter indicating the unit: days, weeks, months or years + (``d``, ``w``, ``m`` or ``y``). A "month" is exactly 30 days + and a "year" is exactly 365 days. """ + + def find_date_and_format(string): + for ord, format in enumerate(cls.date_formats): + for format_option in format: + try: + date = datetime.strptime(string, format_option) + return date, ord + except ValueError: + # Parsing failed. + pass + return (None, None) + if not string: return None - ordinal = string.count('-') - if ordinal >= len(cls.date_formats): - # Too many components. - return None - date_format = cls.date_formats[ordinal] - try: - date = datetime.strptime(string, date_format) - except ValueError: - # Parsing failed. - return None + + # Check for a relative date. + match_dq = re.match(cls.relative_re, string) + if match_dq: + sign = match_dq.group('sign') + quantity = match_dq.group('quantity') + timespan = match_dq.group('timespan') + + # Add or subtract the given amount of time from the current + # date. + multiplier = -1 if sign == '-' else 1 + days = cls.relative_units[timespan] + date = datetime.now() + \ + timedelta(days=int(quantity) * days) * multiplier + return cls(date, cls.precisions[5]) + + # Check for an absolute date. + date, ordinal = find_date_and_format(string) + if date is None: + raise InvalidQueryArgumentValueError(string, + 'a valid date/time string') precision = cls.precisions[ordinal] return cls(date, precision) @@ -574,6 +646,12 @@ class Period(object): return date.replace(year=date.year + 1, month=1) elif 'day' == precision: return date + timedelta(days=1) + elif 'hour' == precision: + return date + timedelta(hours=1) + elif 'minute' == precision: + return date + timedelta(minutes=1) + elif 'second' == precision: + return date + timedelta(seconds=1) else: raise ValueError(u'unhandled precision {0}'.format(precision)) @@ -620,14 +698,17 @@ class DateQuery(FieldQuery): The value of a date field can be matched against a date interval by using an ellipsis interval syntax similar to that of NumericQuery. """ + def __init__(self, field, pattern, fast=True): super(DateQuery, self).__init__(field, pattern, fast) start, end = _parse_periods(pattern) self.interval = DateInterval.from_periods(start, end) def match(self, item): + if self.field not in item: + return False timestamp = float(item[self.field]) - date = datetime.utcfromtimestamp(timestamp) + date = datetime.fromtimestamp(timestamp) return self.interval.contains(date) _clause_tmpl = "{0} {1} ?" @@ -661,6 +742,7 @@ class DurationQuery(NumericQuery): Raises InvalidQueryError when the pattern does not represent an int, float or M:SS time interval. """ + def _convert(self, s): """Convert a M:SS or numeric string to a float. @@ -675,7 +757,7 @@ class DurationQuery(NumericQuery): try: return float(s) except ValueError: - raise InvalidQueryArgumentTypeError( + raise InvalidQueryArgumentValueError( s, u"a M:SS string or a float") @@ -783,6 +865,7 @@ class FieldSort(Sort): """An abstract sort criterion that orders by a specific field (of any kind). """ + def __init__(self, field, ascending=True, case_insensitive=True): self.field = field self.ascending = ascending @@ -795,7 +878,7 @@ class FieldSort(Sort): def key(item): field_val = item.get(self.field, '') - if self.case_insensitive and isinstance(field_val, unicode): + if self.case_insensitive and isinstance(field_val, six.text_type): field_val = field_val.lower() return field_val @@ -820,6 +903,7 @@ class FieldSort(Sort): class FixedFieldSort(FieldSort): """Sort object to sort on a fixed field. """ + def order_clause(self): order = "ASC" if self.ascending else "DESC" if self.case_insensitive: @@ -836,12 +920,14 @@ class SlowFieldSort(FieldSort): """A sort criterion by some model field other than a fixed field: i.e., a computed or flexible field. """ + def is_slow(self): return True class NullSort(Sort): """No sorting. Leave results unsorted.""" + def sort(self, items): return items diff --git a/libs/beets/dbcore/types.py b/libs/beets/dbcore/types.py index 2726969d..b909904b 100644 --- a/libs/beets/dbcore/types.py +++ b/libs/beets/dbcore/types.py @@ -19,6 +19,10 @@ from __future__ import division, absolute_import, print_function from . import query from beets.util import str2bool +import six + +if not six.PY2: + buffer = memoryview # sqlite won't accept memoryview in python 2 # Abstract base. @@ -37,7 +41,7 @@ class Type(object): """The `Query` subclass to be used when querying the field. """ - model_type = unicode + model_type = six.text_type """The Python type that is used to represent the value in the model. The model is guaranteed to return a value of this type if the field @@ -61,9 +65,9 @@ class Type(object): if value is None: value = u'' if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') + value = value.decode('utf-8', 'ignore') - return unicode(value) + return six.text_type(value) def parse(self, string): """Parse a (possibly human-written) string and return the @@ -97,12 +101,12 @@ class Type(object): https://docs.python.org/2/library/sqlite3.html#sqlite-and-python-types Flexible fields have the type affinity `TEXT`. This means the - `sql_value` is either a `buffer` or a `unicode` object` and the - method must handle these in addition. + `sql_value` is either a `buffer`/`memoryview` or a `unicode` object` + and the method must handle these in addition. """ if isinstance(sql_value, buffer): - sql_value = bytes(sql_value).decode('utf8', 'ignore') - if isinstance(sql_value, unicode): + sql_value = bytes(sql_value).decode('utf-8', 'ignore') + if isinstance(sql_value, six.text_type): return self.parse(sql_value) else: return self.normalize(sql_value) @@ -194,7 +198,7 @@ class Boolean(Type): model_type = bool def format(self, value): - return unicode(bool(value)) + return six.text_type(bool(value)) def parse(self, string): return str2bool(string) diff --git a/libs/beets/importer.py b/libs/beets/importer.py index bfaa21a0..4e4084ee 100644 --- a/libs/beets/importer.py +++ b/libs/beets/importer.py @@ -37,14 +37,13 @@ from beets import dbcore from beets import plugins from beets import util from beets import config -from beets.util import pipeline, sorted_walk, ancestry +from beets.util import pipeline, sorted_walk, ancestry, MoveOperation from beets.util import syspath, normpath, displayable_path from enum import Enum from beets import mediafile action = Enum('action', - ['SKIP', 'ASIS', 'TRACKS', 'MANUAL', 'APPLY', 'MANUAL_ID', - 'ALBUMS', 'RETAG']) + ['SKIP', 'ASIS', 'TRACKS', 'APPLY', 'ALBUMS', 'RETAG']) # The RETAG action represents "don't apply any match, but do record # new metadata". It's not reachable via the standard command prompt but # can be used by plugins. @@ -69,7 +68,7 @@ class ImportAbort(Exception): def _open_state(): """Reads the state file, returning a dictionary.""" try: - with open(config['statefile'].as_filename()) as f: + with open(config['statefile'].as_filename(), 'rb') as f: return pickle.load(f) except Exception as exc: # The `pickle` module can emit all sorts of exceptions during @@ -83,7 +82,7 @@ def _open_state(): def _save_state(state): """Writes the state dictionary out to disk.""" try: - with open(config['statefile'].as_filename(), 'w') as f: + with open(config['statefile'].as_filename(), 'wb') as f: pickle.dump(state, f) except IOError as exc: log.error(u'state file could not be written: {0}', exc) @@ -189,6 +188,8 @@ class ImportSession(object): self.paths = paths self.query = query self._is_resuming = dict() + self._merged_items = set() + self._merged_dirs = set() # Normalize the paths. if self.paths: @@ -221,13 +222,19 @@ class ImportSession(object): iconfig['resume'] = False iconfig['incremental'] = False - # Copy, move, and link are mutually exclusive. + # Copy, move, link, and hardlink are mutually exclusive. if iconfig['move']: iconfig['copy'] = False iconfig['link'] = False + iconfig['hardlink'] = False elif iconfig['link']: iconfig['copy'] = False iconfig['move'] = False + iconfig['hardlink'] = False + elif iconfig['hardlink']: + iconfig['copy'] = False + iconfig['move'] = False + iconfig['link'] = False # Only delete when copying. if not iconfig['copy']: @@ -306,6 +313,8 @@ class ImportSession(object): stages += [import_asis(self)] # Plugin stages. + for stage_func in plugins.early_import_stages(): + stages.append(plugin_stage(self, stage_func)) for stage_func in plugins.import_stages(): stages.append(plugin_stage(self, stage_func)) @@ -331,7 +340,7 @@ class ImportSession(object): been imported in a previous session. """ if self.is_resuming(toppath) \ - and all(map(lambda p: progress_element(toppath, p), paths)): + and all([progress_element(toppath, p) for p in paths]): return True if self.config['incremental'] \ and tuple(paths) in self.history_dirs: @@ -345,6 +354,24 @@ class ImportSession(object): self._history_dirs = history_get() return self._history_dirs + def already_merged(self, paths): + """Returns true if all the paths being imported were part of a merge + during previous tasks. + """ + for path in paths: + if path not in self._merged_items \ + and path not in self._merged_dirs: + return False + return True + + def mark_merged(self, paths): + """Mark paths and directories as merged for future reimport tasks. + """ + self._merged_items.update(paths) + dirs = set([os.path.dirname(path) if os.path.isfile(path) else path + for path in paths]) + self._merged_dirs.update(dirs) + def is_resuming(self, toppath): """Return `True` if user wants to resume import of this path. @@ -362,8 +389,8 @@ class ImportSession(object): # Either accept immediately or prompt for input to decide. if self.want_resume is True or \ self.should_resume(toppath): - log.warn(u'Resuming interrupted import of {0}', - util.displayable_path(toppath)) + log.warning(u'Resuming interrupted import of {0}', + util.displayable_path(toppath)) self._is_resuming[toppath] = True else: # Clear progress; we're starting from the top. @@ -414,7 +441,7 @@ class ImportTask(BaseImportTask): from the `candidates` list. * `find_duplicates()` Returns a list of albums from `lib` with the - same artist and album name as the task. + same artist and album name as the task. * `apply_metadata()` Sets the attributes of the items from the task's `match` attribute. @@ -424,6 +451,9 @@ class ImportTask(BaseImportTask): * `manipulate_files()` Copy, move, and write files depending on the session configuration. + * `set_fields()` Sets the fields given at CLI or configuration to + the specified values. + * `finalize()` Update the import progress and cleanup the file system. """ @@ -435,6 +465,7 @@ class ImportTask(BaseImportTask): self.candidates = [] self.rec = None self.should_remove_duplicates = False + self.should_merge_duplicates = False self.is_album = True self.search_ids = [] # user-supplied candidate IDs. @@ -443,7 +474,6 @@ class ImportTask(BaseImportTask): indicates that an action has been selected for this task. """ # Not part of the task structure: - assert choice not in (action.MANUAL, action.MANUAL_ID) assert choice != action.APPLY # Only used internally. if choice in (action.SKIP, action.ASIS, action.TRACKS, action.ALBUMS, action.RETAG): @@ -499,13 +529,17 @@ class ImportTask(BaseImportTask): if self.choice_flag in (action.ASIS, action.RETAG): return list(self.items) elif self.choice_flag == action.APPLY: - return self.match.mapping.keys() + return list(self.match.mapping.keys()) else: assert False def apply_metadata(self): """Copy metadata from match info to the items. """ + if config['import']['from_scratch']: + for item in self.match.mapping: + item.clear() + autotag.apply_metadata(self.match.info, self.match.mapping) def duplicate_items(self, lib): @@ -526,13 +560,29 @@ class ImportTask(BaseImportTask): util.prune_dirs(os.path.dirname(item.path), lib.directory) + def set_fields(self): + """Sets the fields given at CLI or configuration to the specified + values. + """ + for field, view in config['import']['set_fields'].items(): + value = view.get() + log.debug(u'Set field {1}={2} for {0}', + displayable_path(self.paths), + field, + value) + self.album[field] = value + self.album.store() + def finalize(self, session): """Save progress, clean up files, and emit plugin event. """ # Update progress. if session.want_resume: self.save_progress() - if session.config['incremental']: + if session.config['incremental'] and not ( + # Should we skip recording to incremental list? + self.skip and session.config['incremental_skip_later'] + ): self.save_history() self.cleanup(copy=session.config['copy'], @@ -587,12 +637,12 @@ class ImportTask(BaseImportTask): candidate IDs are stored in self.search_ids: if present, the initial lookup is restricted to only those IDs. """ - artist, album, candidates, recommendation = \ + artist, album, prop = \ autotag.tag_album(self.items, search_ids=self.search_ids) self.cur_artist = artist self.cur_album = album - self.candidates = candidates - self.rec = recommendation + self.candidates = prop.candidates + self.rec = prop.recommendation def find_duplicates(self, lib): """Return a list of albums from `lib` with the same artist and @@ -612,10 +662,11 @@ class ImportTask(BaseImportTask): )) for album in lib.albums(duplicate_query): - # Check whether the album is identical in contents, in which - # case it is not a duplicate (will be replaced). + # Check whether the album paths are all present in the task + # i.e. album is being completely re-imported by the task, + # in which case it is not a duplicate (will be replaced). album_paths = set(i.path for i in album.items()) - if album_paths != task_paths: + if not (album_paths <= task_paths): duplicates.append(album) return duplicates @@ -640,7 +691,7 @@ class ImportTask(BaseImportTask): changes['comp'] = False else: # VA. - changes['albumartist'] = config['va_name'].get(unicode) + changes['albumartist'] = config['va_name'].as_str() changes['comp'] = True elif self.choice_flag in (action.APPLY, action.RETAG): @@ -655,20 +706,28 @@ class ImportTask(BaseImportTask): for item in self.items: item.update(changes) - def manipulate_files(self, move=False, copy=False, write=False, - link=False, session=None): + def manipulate_files(self, operation=None, write=False, session=None): + """ Copy, move, link or hardlink (depending on `operation`) the files + as well as write metadata. + + `operation` should be an instance of `util.MoveOperation`. + + If `write` is `True` metadata is written to the files. + """ + items = self.imported_items() # Save the original paths of all items for deletion and pruning # in the next step (finalization). self.old_paths = [item.path for item in items] for item in items: - if move or copy or link: + if operation is not None: # In copy and link modes, treat re-imports specially: # move in-library files. (Out-of-library files are # copied/moved as usual). old_path = item.path - if (copy or link) and self.replaced_items[item] and \ - session.lib.directory in util.ancestry(old_path): + if (operation != MoveOperation.MOVE + and self.replaced_items[item] + and session.lib.directory in util.ancestry(old_path)): item.move() # We moved the item, so remove the # now-nonexistent file from old_paths. @@ -676,7 +735,7 @@ class ImportTask(BaseImportTask): else: # A normal import. Just copy files and keep track of # old paths. - item.move(copy, link) + item.move(operation) if write and (self.apply or self.choice_flag == action.RETAG): item.try_write() @@ -830,10 +889,9 @@ class SingletonImportTask(ImportTask): plugins.send('item_imported', lib=lib, item=item) def lookup_candidates(self): - candidates, recommendation = autotag.tag_item( - self.item, search_ids=self.search_ids) - self.candidates = candidates - self.rec = recommendation + prop = autotag.tag_item(self.item, search_ids=self.search_ids) + self.candidates = prop.candidates + self.rec = prop.recommendation def find_duplicates(self, lib): """Return a list of items from `lib` that have the same artist @@ -874,6 +932,19 @@ class SingletonImportTask(ImportTask): def reload(self): self.item.load() + def set_fields(self): + """Sets the fields given at CLI or configuration to the specified + values. + """ + for field, view in config['import']['set_fields'].items(): + value = view.get() + log.debug(u'Set field {1}={2} for {0}', + displayable_path(self.paths), + field, + value) + self.item[field] = value + self.item.store() + # FIXME The inheritance relationships are inverted. This is why there # are so many methods which pass. More responsibility should be delegated to @@ -944,7 +1015,7 @@ class ArchiveImportTask(SentinelImportTask): return False for path_test, _ in cls.handlers(): - if path_test(path): + if path_test(util.py3_path(path)): return True return False @@ -985,12 +1056,12 @@ class ArchiveImportTask(SentinelImportTask): `toppath` to that directory. """ for path_test, handler_class in self.handlers(): - if path_test(self.toppath): + if path_test(util.py3_path(self.toppath)): break try: extract_to = mkdtemp() - archive = handler_class(self.toppath, mode='r') + archive = handler_class(util.py3_path(self.toppath), mode='r') archive.extractall(extract_to) finally: archive.close() @@ -1148,8 +1219,8 @@ class ImportTaskFactory(object): if not (self.session.config['move'] or self.session.config['copy']): - log.warn(u"Archive importing requires either " - u"'copy' or 'move' to be enabled.") + log.warning(u"Archive importing requires either " + u"'copy' or 'move' to be enabled.") return log.debug(u'Extracting archive: {0}', @@ -1179,12 +1250,33 @@ class ImportTaskFactory(object): # Silently ignore non-music files. pass elif isinstance(exc.reason, mediafile.UnreadableFileError): - log.warn(u'unreadable file: {0}', displayable_path(path)) + log.warning(u'unreadable file: {0}', displayable_path(path)) else: log.error(u'error reading {0}: {1}', displayable_path(path), exc) +# Pipeline utilities + +def _freshen_items(items): + # Clear IDs from re-tagged items so they appear "fresh" when + # we add them back to the library. + for item in items: + item.id = None + item.album_id = None + + +def _extend_pipeline(tasks, *stages): + # Return pipeline extension for stages with list of tasks + if type(tasks) == list: + task_iter = iter(tasks) + else: + task_iter = tasks + + ipl = pipeline.Pipeline([task_iter] + list(stages)) + return pipeline.multiple(ipl.pull()) + + # Full-album pipeline stages. def read_tasks(session): @@ -1204,8 +1296,8 @@ def read_tasks(session): skipped += task_factory.skipped if not task_factory.imported: - log.warn(u'No files imported from {0}', - displayable_path(toppath)) + log.warning(u'No files imported from {0}', + displayable_path(toppath)) # Show skipped directories (due to incremental/resume). if skipped: @@ -1230,12 +1322,7 @@ def query_tasks(session): log.debug(u'yielding album {0}: {1} - {2}', album.id, album.albumartist, album.album) items = list(album.items()) - - # Clear IDs from re-tagged items so they appear "fresh" when - # we add them back to the library. - for item in items: - item.id = None - item.album_id = None + _freshen_items(items) task = ImportTask(None, [album.item_dir()], items) for task in task.handle_created(session): @@ -1281,6 +1368,9 @@ def user_query(session, task): if task.skip: return task + if session.already_merged(task.paths): + return pipeline.BUBBLE + # Ask the user for a choice. task.choose_match(session) plugins.send('import_task_choice', session=session, task=task) @@ -1295,24 +1385,38 @@ def user_query(session, task): yield new_task yield SentinelImportTask(task.toppath, task.paths) - ipl = pipeline.Pipeline([ - emitter(task), - lookup_candidates(session), - user_query(session), - ]) - return pipeline.multiple(ipl.pull()) + return _extend_pipeline(emitter(task), + lookup_candidates(session), + user_query(session)) # As albums: group items by albums and create task for each album if task.choice_flag is action.ALBUMS: - ipl = pipeline.Pipeline([ - iter([task]), - group_albums(session), - lookup_candidates(session), - user_query(session) - ]) - return pipeline.multiple(ipl.pull()) + return _extend_pipeline([task], + group_albums(session), + lookup_candidates(session), + user_query(session)) resolve_duplicates(session, task) + + if task.should_merge_duplicates: + # Create a new task for tagging the current items + # and duplicates together + duplicate_items = task.duplicate_items(session.lib) + + # Duplicates would be reimported so make them look "fresh" + _freshen_items(duplicate_items) + duplicate_paths = [item.path for item in duplicate_items] + + # Record merged paths in the session so they are not reimported + session.mark_merged(duplicate_paths) + + merged_task = ImportTask(None, task.paths + duplicate_paths, + task.items + duplicate_items) + + return _extend_pipeline([merged_task], + lookup_candidates(session), + user_query(session)) + apply_choice(session, task) return task @@ -1327,7 +1431,33 @@ def resolve_duplicates(session, task): log.debug(u'found duplicates: {}'.format( [o.id for o in found_duplicates] )) - session.resolve_duplicate(task, found_duplicates) + + # Get the default action to follow from config. + duplicate_action = config['import']['duplicate_action'].as_choice({ + u'skip': u's', + u'keep': u'k', + u'remove': u'r', + u'merge': u'm', + u'ask': u'a', + }) + log.debug(u'default action for duplicates: {0}', duplicate_action) + + if duplicate_action == u's': + # Skip new. + task.set_choice(action.SKIP) + elif duplicate_action == u'k': + # Keep both. Do nothing; leave the choice intact. + pass + elif duplicate_action == u'r': + # Remove old. + task.should_remove_duplicates = True + elif duplicate_action == u'm': + # Merge duplicates together + task.should_merge_duplicates = True + else: + # No default action set; ask the session. + session.resolve_duplicate(task, found_duplicates) + session.log_choice(task, True) @@ -1360,6 +1490,14 @@ def apply_choice(session, task): task.add(session.lib) + # If ``set_fields`` is set, set those fields to the + # configured values. + # NOTE: This cannot be done before the ``task.add()`` call above, + # because then the ``ImportTask`` won't have an `album` for which + # it can set the fields. + if config['import']['set_fields']: + task.set_fields() + @pipeline.mutator_stage def plugin_stage(session, func, task): @@ -1388,11 +1526,20 @@ def manipulate_files(session, task): if task.should_remove_duplicates: task.remove_duplicates(session.lib) + if session.config['move']: + operation = MoveOperation.MOVE + elif session.config['copy']: + operation = MoveOperation.COPY + elif session.config['link']: + operation = MoveOperation.LINK + elif session.config['hardlink']: + operation = MoveOperation.HARDLINK + else: + operation = None + task.manipulate_files( - move=session.config['move'], - copy=session.config['copy'], + operation, write=session.config['write'], - link=session.config['link'], session=session, ) @@ -1439,8 +1586,16 @@ def group_albums(session): task = pipeline.multiple(tasks) -MULTIDISC_MARKERS = (r'dis[ck]', r'cd') -MULTIDISC_PAT_FMT = r'^(.*%s[\W_]*)\d' +MULTIDISC_MARKERS = (br'dis[ck]', br'cd') +MULTIDISC_PAT_FMT = br'^(.*%s[\W_]*)\d' + + +def is_subdir_of_any_in_list(path, dirs): + """Returns True if path os a subdirectory of any directory in dirs + (a list). In other case, returns False. + """ + ancestors = ancestry(path) + return any(d in ancestors for d in dirs) def albums_in_dir(path): @@ -1462,7 +1617,7 @@ def albums_in_dir(path): # and add the current directory. If so, just add the directory # and move on to the next directory. If not, stop collapsing. if collapse_paths: - if (not collapse_pat and collapse_paths[0] in ancestry(root)) or \ + if (is_subdir_of_any_in_list(root, collapse_paths)) or \ (collapse_pat and collapse_pat.match(os.path.basename(root))): # Still collapsing. @@ -1483,7 +1638,9 @@ def albums_in_dir(path): # named in this way. start_collapsing = False for marker in MULTIDISC_MARKERS: - marker_pat = re.compile(MULTIDISC_PAT_FMT % marker, re.I) + # We're using replace on %s due to lack of .format() on bytestrings + p = MULTIDISC_PAT_FMT.replace(b'%s', marker) + marker_pat = re.compile(p, re.I) match = marker_pat.match(os.path.basename(root)) # Is this directory the root of a nested multi-disc album? @@ -1492,13 +1649,16 @@ def albums_in_dir(path): start_collapsing = True subdir_pat = None for subdir in dirs: + subdir = util.bytestring_path(subdir) # The first directory dictates the pattern for # the remaining directories. if not subdir_pat: match = marker_pat.match(subdir) if match: + match_group = re.escape(match.group(1)) subdir_pat = re.compile( - br'^%s\d' % re.escape(match.group(1)), re.I + b''.join([b'^', match_group, br'\d']), + re.I ) else: start_collapsing = False @@ -1520,7 +1680,8 @@ def albums_in_dir(path): # Set the current pattern to match directories with the same # prefix as this one, followed by a digit. collapse_pat = re.compile( - br'^%s\d' % re.escape(match.group(1)), re.I + b''.join([b'^', re.escape(match.group(1)), br'\d']), + re.I ) break diff --git a/libs/beets/library.py b/libs/beets/library.py index 99397013..ba57407d 100644 --- a/libs/beets/library.py +++ b/libs/beets/library.py @@ -22,18 +22,27 @@ import sys import unicodedata import time import re -from unidecode import unidecode +import six from beets import logging -from beets.mediafile import MediaFile, MutagenError, UnreadableFileError +from beets.mediafile import MediaFile, UnreadableFileError from beets import plugins from beets import util -from beets.util import bytestring_path, syspath, normpath, samefile +from beets.util import bytestring_path, syspath, normpath, samefile, \ + MoveOperation from beets.util.functemplate import Template from beets import dbcore from beets.dbcore import types import beets +# To use the SQLite "blob" type, it doesn't suffice to provide a byte +# string; SQLite treats that as encoded text. Wrapping it in a `buffer` or a +# `memoryview`, depending on the Python version, tells it that we +# actually mean non-text data. +if six.PY2: + BLOB_TYPE = buffer # noqa: F821 +else: + BLOB_TYPE = memoryview log = logging.getLogger('beets') @@ -48,9 +57,6 @@ class PathQuery(dbcore.FieldQuery): and case-sensitive otherwise. """ - escape_re = re.compile(r'[\\_%]') - escape_char = b'\\' - def __init__(self, field, pattern, fast=True, case_sensitive=None): """Create a path query. `pattern` must be a path, either to a file or a directory. @@ -85,28 +91,31 @@ class PathQuery(dbcore.FieldQuery): colon = query_part.find(':') if colon != -1: query_part = query_part[:colon] - return (os.sep in query_part and - os.path.exists(syspath(normpath(query_part)))) + + # Test both `sep` and `altsep` (i.e., both slash and backslash on + # Windows). + return ( + (os.sep in query_part or + (os.altsep and os.altsep in query_part)) and + os.path.exists(syspath(normpath(query_part))) + ) def match(self, item): path = item.path if self.case_sensitive else item.path.lower() return (path == self.file_path) or path.startswith(self.dir_path) def col_clause(self): - if self.case_sensitive: - file_blob = buffer(self.file_path) - dir_blob = buffer(self.dir_path) - return '({0} = ?) || (substr({0}, 1, ?) = ?)'.format(self.field), \ - (file_blob, len(dir_blob), dir_blob) + file_blob = BLOB_TYPE(self.file_path) + dir_blob = BLOB_TYPE(self.dir_path) - escape = lambda m: self.escape_char + m.group(0) - dir_pattern = self.escape_re.sub(escape, self.dir_path) - dir_blob = buffer(dir_pattern + b'%') - file_pattern = self.escape_re.sub(escape, self.file_path) - file_blob = buffer(file_pattern) - return '({0} LIKE ? ESCAPE ?) || ({0} LIKE ? ESCAPE ?)'.format( - self.field), (file_blob, self.escape_char, dir_blob, - self.escape_char) + if self.case_sensitive: + query_part = '({0} = ?) || (substr({0}, 1, ?) = ?)' + else: + query_part = '(BYTELOWER({0}) = BYTELOWER(?)) || \ + (substr(BYTELOWER({0}), 1, ?) = BYTELOWER(?))' + + return query_part.format(self.field), \ + (file_blob, len(dir_blob), dir_blob) # Library-specific field types. @@ -117,14 +126,15 @@ class DateType(types.Float): query = dbcore.query.DateQuery def format(self, value): - return time.strftime(beets.config['time_format'].get(unicode), + return time.strftime(beets.config['time_format'].as_str(), time.localtime(value or 0)) def parse(self, string): try: # Try a formatted date string. return time.mktime( - time.strptime(string, beets.config['time_format'].get(unicode)) + time.strptime(string, + beets.config['time_format'].as_str()) ) except ValueError: # Fall back to a plain timestamp number. @@ -135,10 +145,27 @@ class DateType(types.Float): class PathType(types.Type): + """A dbcore type for filesystem paths. These are represented as + `bytes` objects, in keeping with the Unix filesystem abstraction. + """ + sql = u'BLOB' query = PathQuery model_type = bytes + def __init__(self, nullable=False): + """Create a path type object. `nullable` controls whether the + type may be missing, i.e., None. + """ + self.nullable = nullable + + @property + def null(self): + if self.nullable: + return None + else: + return b'' + def format(self, value): return util.displayable_path(value) @@ -146,12 +173,11 @@ class PathType(types.Type): return normpath(bytestring_path(string)) def normalize(self, value): - if isinstance(value, unicode): + if isinstance(value, six.text_type): # Paths stored internally as encoded bytes. return bytestring_path(value) - elif isinstance(value, buffer): - # SQLite must store bytestings as buffers to avoid decoding. + elif isinstance(value, BLOB_TYPE): # We unwrap buffers to bytes. return bytes(value) @@ -163,7 +189,7 @@ class PathType(types.Type): def to_sql(self, value): if isinstance(value, bytes): - value = buffer(value) + value = BLOB_TYPE(value) return value @@ -180,6 +206,8 @@ class MusicalKey(types.String): r'bb': 'a#', } + null = None + def parse(self, key): key = key.lower() for flat, sharp in self.ENHARMONIC.items(): @@ -254,7 +282,7 @@ PF_KEY_DEFAULT = 'default' # Exceptions. - +@six.python_2_unicode_compatible class FileOperationError(Exception): """Indicates an error when interacting with a file on disk. Possibilities include an unsupported media type, a permissions @@ -268,35 +296,39 @@ class FileOperationError(Exception): self.path = path self.reason = reason - def __unicode__(self): + def text(self): """Get a string representing the error. Describes both the underlying reason and the file path in question. """ return u'{0}: {1}'.format( util.displayable_path(self.path), - unicode(self.reason) + six.text_type(self.reason) ) - def __str__(self): - return unicode(self).encode('utf8') + # define __str__ as text to avoid infinite loop on super() calls + # with @six.python_2_unicode_compatible + __str__ = text +@six.python_2_unicode_compatible class ReadError(FileOperationError): """An error while reading a file (i.e. in `Item.read`). """ - def __unicode__(self): - return u'error reading ' + super(ReadError, self).__unicode__() + def __str__(self): + return u'error reading ' + super(ReadError, self).text() +@six.python_2_unicode_compatible class WriteError(FileOperationError): """An error while writing a file (i.e. in `Item.write`). """ - def __unicode__(self): - return u'error writing ' + super(WriteError, self).__unicode__() + def __str__(self): + return u'error writing ' + super(WriteError, self).text() # Item and Album model classes. +@six.python_2_unicode_compatible class LibModel(dbcore.Model): """Shared concrete functionality for Items and Albums. """ @@ -310,8 +342,8 @@ class LibModel(dbcore.Model): funcs.update(plugins.template_funcs()) return funcs - def store(self): - super(LibModel, self).store() + def store(self, fields=None): + super(LibModel, self).store(fields) plugins.send('database_change', lib=self._db, model=self) def remove(self): @@ -324,20 +356,16 @@ class LibModel(dbcore.Model): def __format__(self, spec): if not spec: - spec = beets.config[self._format_config_key].get(unicode) - result = self.evaluate_template(spec) - if isinstance(spec, bytes): - # if spec is a byte string then we must return a one as well - return result.encode('utf8') - else: - return result + spec = beets.config[self._format_config_key].as_str() + assert isinstance(spec, six.text_type) + return self.evaluate_template(spec) def __str__(self): - return format(self).encode('utf8') - - def __unicode__(self): return format(self) + def __bytes__(self): + return self.__str__().encode('utf-8') + class FormattedItemMapping(dbcore.db.FormattedMapping): """Add lookup for album-level fields. @@ -407,7 +435,10 @@ class Item(LibModel): 'albumartist_sort': types.STRING, 'albumartist_credit': types.STRING, 'genre': types.STRING, + 'lyricist': types.STRING, 'composer': types.STRING, + 'composer_sort': types.STRING, + 'arranger': types.STRING, 'grouping': types.STRING, 'year': types.PaddedInt(4), 'month': types.PaddedInt(2), @@ -424,6 +455,7 @@ class Item(LibModel): 'mb_albumid': types.STRING, 'mb_artistid': types.STRING, 'mb_albumartistid': types.STRING, + 'mb_releasetrackid': types.STRING, 'albumtype': types.STRING, 'label': types.STRING, 'acoustid_fingerprint': types.STRING, @@ -443,6 +475,8 @@ class Item(LibModel): 'rg_track_peak': types.NULL_FLOAT, 'rg_album_gain': types.NULL_FLOAT, 'rg_album_peak': types.NULL_FLOAT, + 'r128_track_gain': types.PaddedInt(6), + 'r128_album_gain': types.PaddedInt(6), 'original_year': types.PaddedInt(4), 'original_month': types.PaddedInt(2), 'original_day': types.PaddedInt(2), @@ -510,15 +544,15 @@ class Item(LibModel): """ # Encode unicode paths and read buffers. if key == 'path': - if isinstance(value, unicode): + if isinstance(value, six.text_type): value = bytestring_path(value) - elif isinstance(value, buffer): + elif isinstance(value, BLOB_TYPE): value = bytes(value) - if key in MediaFile.fields(): - self.mtime = 0 # Reset mtime on dirty. + changed = super(Item, self)._setitem(key, value) - super(Item, self).__setitem__(key, value) + if changed and key in MediaFile.fields(): + self.mtime = 0 # Reset mtime on dirty. def update(self, values): """Set all key/value pairs in the mapping. If mtime is @@ -528,6 +562,11 @@ class Item(LibModel): if self.mtime == 0 and 'mtime' in values: self.mtime = values['mtime'] + def clear(self): + """Set all key/value pairs to None.""" + for key in self._media_fields: + setattr(self, key, None) + def get_album(self): """Get the Album object that this item belongs to, if any, or None if the item is a singleton or is not associated with a @@ -554,12 +593,12 @@ class Item(LibModel): read_path = normpath(read_path) try: mediafile = MediaFile(syspath(read_path)) - except (OSError, IOError, UnreadableFileError) as exc: + except UnreadableFileError as exc: raise ReadError(read_path, exc) for key in self._media_fields: value = getattr(mediafile, key) - if isinstance(value, (int, long)): + if isinstance(value, six.integer_types): if value.bit_length() > 63: value = 0 self[key] = value @@ -601,14 +640,14 @@ class Item(LibModel): try: mediafile = MediaFile(syspath(path), id3v23=beets.config['id3v23'].get(bool)) - except (OSError, IOError, UnreadableFileError) as exc: - raise ReadError(self.path, exc) + except UnreadableFileError as exc: + raise ReadError(path, exc) # Write the tags to the file. mediafile.update(item_tags) try: mediafile.save() - except (OSError, IOError, MutagenError) as exc: + except UnreadableFileError as exc: raise WriteError(self.path, exc) # The file has a new mtime. @@ -653,27 +692,34 @@ class Item(LibModel): # Files themselves. - def move_file(self, dest, copy=False, link=False): - """Moves or copies the item's file, updating the path value if - the move succeeds. If a file exists at ``dest``, then it is - slightly modified to be unique. + def move_file(self, dest, operation=MoveOperation.MOVE): + """Move, copy, link or hardlink the item's depending on `operation`, + updating the path value if the move succeeds. + + If a file exists at `dest`, then it is slightly modified to be unique. + + `operation` should be an instance of `util.MoveOperation`. """ if not util.samefile(self.path, dest): dest = util.unique_path(dest) - if copy: - util.copy(self.path, dest) - plugins.send("item_copied", item=self, source=self.path, - destination=dest) - elif link: - util.link(self.path, dest) - plugins.send("item_linked", item=self, source=self.path, - destination=dest) - else: + if operation == MoveOperation.MOVE: plugins.send("before_item_moved", item=self, source=self.path, destination=dest) util.move(self.path, dest) plugins.send("item_moved", item=self, source=self.path, destination=dest) + elif operation == MoveOperation.COPY: + util.copy(self.path, dest) + plugins.send("item_copied", item=self, source=self.path, + destination=dest) + elif operation == MoveOperation.LINK: + util.link(self.path, dest) + plugins.send("item_linked", item=self, source=self.path, + destination=dest) + elif operation == MoveOperation.HARDLINK: + util.hardlink(self.path, dest) + plugins.send("item_hardlinked", item=self, source=self.path, + destination=dest) # Either copying or moving succeeded, so update the stored path. self.path = dest @@ -720,26 +766,27 @@ class Item(LibModel): self._db._memotable = {} - def move(self, copy=False, link=False, basedir=None, with_album=True): + def move(self, operation=MoveOperation.MOVE, basedir=None, + with_album=True, store=True): """Move the item to its designated location within the library directory (provided by destination()). Subdirectories are created as needed. If the operation succeeds, the item's path field is updated to reflect the new location. - If `copy` is true, moving the file is copied rather than moved. - Similarly, `link` creates a symlink instead. + Instead of moving the item it can also be copied, linked or hardlinked + depending on `operation` which should be an instance of + `util.MoveOperation`. - basedir overrides the library base directory for the - destination. + `basedir` overrides the library base directory for the destination. - If the item is in an album, the album is given an opportunity to - move its art. (This can be disabled by passing - with_album=False.) + If the item is in an album and `with_album` is `True`, the album is + given an opportunity to move its art. - The item is stored to the database if it is in the database, so - any dirty fields prior to the move() call will be written as a - side effect. You probably want to call save() to commit the DB - transaction. + By default, the item is stored to the database if it is in the + database, so any dirty fields prior to the move() call will be written + as a side effect. + If `store` is `False` however, the item won't be stored and you'll + have to manually store it after invoking this method. """ self._check_db() dest = self.destination(basedir=basedir) @@ -749,18 +796,20 @@ class Item(LibModel): # Perform the move and store the change. old_path = self.path - self.move_file(dest, copy, link) - self.store() + self.move_file(dest, operation) + if store: + self.store() # If this item is in an album, move its art. if with_album: album = self.get_album() if album: - album.move_art(copy) - album.store() + album.move_art(operation) + if store: + album.store() # Prune vacated directory. - if not copy: + if operation == MoveOperation.MOVE: util.prune_dirs(os.path.dirname(old_path), self._db.directory) # Templating. @@ -811,7 +860,10 @@ class Item(LibModel): subpath = unicodedata.normalize('NFC', subpath) if beets.config['asciify_paths']: - subpath = unidecode(subpath) + subpath = util.asciify_path( + subpath, + beets.config['path_sep_replace'].as_str() + ) maxlen = beets.config['max_filename_length'].get(int) if not maxlen: @@ -833,7 +885,7 @@ class Item(LibModel): ) if fragment: - return subpath + return util.as_string(subpath) else: return normpath(os.path.join(basedir, subpath)) @@ -848,7 +900,7 @@ class Album(LibModel): _always_dirty = True _fields = { 'id': types.PRIMARY_ID, - 'artpath': PathType(), + 'artpath': PathType(True), 'added': DateType(), 'albumartist': types.STRING, @@ -875,6 +927,7 @@ class Album(LibModel): 'albumdisambig': types.STRING, 'rg_album_gain': types.NULL_FLOAT, 'rg_album_peak': types.NULL_FLOAT, + 'r128_album_gain': types.PaddedInt(6), 'original_year': types.PaddedInt(4), 'original_month': types.PaddedInt(2), 'original_day': types.PaddedInt(2), @@ -918,6 +971,7 @@ class Album(LibModel): 'albumdisambig', 'rg_album_gain', 'rg_album_peak', + 'r128_album_gain', 'original_year', 'original_month', 'original_day', @@ -962,9 +1016,12 @@ class Album(LibModel): for item in self.items(): item.remove(delete, False) - def move_art(self, copy=False, link=False): - """Move or copy any existing album art so that it remains in the - same directory as the items. + def move_art(self, operation=MoveOperation.MOVE): + """Move, copy, link or hardlink (depending on `operation`) any + existing album art so that it remains in the same directory as + the items. + + `operation` should be an instance of `util.MoveOperation`. """ old_art = self.artpath if not old_art: @@ -978,39 +1035,47 @@ class Album(LibModel): log.debug(u'moving album art {0} to {1}', util.displayable_path(old_art), util.displayable_path(new_art)) - if copy: - util.copy(old_art, new_art) - elif link: - util.link(old_art, new_art) - else: + if operation == MoveOperation.MOVE: util.move(old_art, new_art) + util.prune_dirs(os.path.dirname(old_art), self._db.directory) + elif operation == MoveOperation.COPY: + util.copy(old_art, new_art) + elif operation == MoveOperation.LINK: + util.link(old_art, new_art) + elif operation == MoveOperation.HARDLINK: + util.hardlink(old_art, new_art) self.artpath = new_art - # Prune old path when moving. - if not copy: - util.prune_dirs(os.path.dirname(old_art), - self._db.directory) + def move(self, operation=MoveOperation.MOVE, basedir=None, store=True): + """Move, copy, link or hardlink (depending on `operation`) + all items to their destination. Any album art moves along with them. - def move(self, copy=False, link=False, basedir=None): - """Moves (or copies) all items to their destination. Any album - art moves along with them. basedir overrides the library base - directory for the destination. The album is stored to the - database, persisting any modifications to its metadata. + `basedir` overrides the library base directory for the destination. + + `operation` should be an instance of `util.MoveOperation`. + + By default, the album is stored to the database, persisting any + modifications to its metadata. If `store` is `False` however, + the album is not stored automatically, and you'll have to manually + store it after invoking this method. """ basedir = basedir or self._db.directory # Ensure new metadata is available to items for destination # computation. - self.store() + if store: + self.store() # Move items. items = list(self.items()) for item in items: - item.move(copy, link, basedir=basedir, with_album=False) + item.move(operation, basedir=basedir, with_album=False, + store=store) # Move art. - self.move_art(copy, link) - self.store() + self.move_art(operation) + if store: + self.store() def item_dir(self): """Returns the directory containing the album's first item, @@ -1054,10 +1119,14 @@ class Album(LibModel): image = bytestring_path(image) item_dir = item_dir or self.item_dir() - filename_tmpl = Template(beets.config['art_filename'].get(unicode)) + filename_tmpl = Template( + beets.config['art_filename'].as_str()) subpath = self.evaluate_template(filename_tmpl, True) if beets.config['asciify_paths']: - subpath = unidecode(subpath) + subpath = util.asciify_path( + subpath, + beets.config['path_sep_replace'].as_str() + ) subpath = util.sanitize_path(subpath, replacements=self._db.replacements) subpath = bytestring_path(subpath) @@ -1098,9 +1167,11 @@ class Album(LibModel): plugins.send('art_set', album=self) - def store(self): + def store(self, fields=None): """Update the database with the album information. The album's tracks are also updated. + :param fields: The fields to be stored. If not specified, all fields + will be. """ # Get modified track fields. track_updates = {} @@ -1109,7 +1180,7 @@ class Album(LibModel): track_updates[key] = self[key] with self._db.transaction(): - super(Album, self).store() + super(Album, self).store(fields) if track_updates: for item in self.items(): for key, value in track_updates.items(): @@ -1172,7 +1243,8 @@ def parse_query_string(s, model_cls): The string is split into components using shell-like syntax. """ - assert isinstance(s, unicode), u"Query is not unicode: {0!r}".format(s) + message = u"Query is not unicode: {0!r}".format(s) + assert isinstance(s, six.text_type), message try: parts = util.shlex_split(s) except ValueError as exc: @@ -1180,6 +1252,19 @@ def parse_query_string(s, model_cls): return parse_query_parts(parts, model_cls) +def _sqlite_bytelower(bytestring): + """ A custom ``bytelower`` sqlite function so we can compare + bytestrings in a semi case insensitive fashion. This is to work + around sqlite builds are that compiled with + ``-DSQLITE_LIKE_DOESNT_MATCH_BLOBS``. See + ``https://github.com/beetbox/beets/issues/2172`` for details. + """ + if not six.PY2: + return bytestring.lower() + + return buffer(bytes(bytestring).lower()) # noqa: F821 + + # The Library: interface to the database. class Library(dbcore.Database): @@ -1192,9 +1277,8 @@ class Library(dbcore.Database): path_formats=((PF_KEY_DEFAULT, '$artist/$album/$track $title'),), replacements=None): - if path != ':memory:': - self.path = bytestring_path(normpath(path)) - super(Library, self).__init__(path) + timeout = beets.config['timeout'].as_number() + super(Library, self).__init__(path, timeout=timeout) self.directory = bytestring_path(normpath(directory)) self.path_formats = path_formats @@ -1202,6 +1286,11 @@ class Library(dbcore.Database): self._memotable = {} # Used for template substitution performance. + def _create_connection(self): + conn = super(Library, self)._create_connection() + conn.create_function('bytelower', 1, _sqlite_bytelower) + return conn + # Adding objects to the database. def add(self, obj): @@ -1248,11 +1337,11 @@ class Library(dbcore.Database): # Parse the query, if necessary. try: parsed_sort = None - if isinstance(query, basestring): + if isinstance(query, six.string_types): query, parsed_sort = parse_query_string(query, model_cls) elif isinstance(query, (list, tuple)): query, parsed_sort = parse_query_parts(query, model_cls) - except dbcore.query.InvalidQueryArgumentTypeError as exc: + except dbcore.query.InvalidQueryArgumentValueError as exc: raise dbcore.InvalidQueryError(query, exc) # Any non-null sort specified by the parsed query overrides the @@ -1392,22 +1481,24 @@ class DefaultTemplateFunctions(object): def tmpl_asciify(s): """Translate non-ASCII characters to their ASCII equivalents. """ - return unidecode(s) + return util.asciify_path(s, beets.config['path_sep_replace'].as_str()) @staticmethod def tmpl_time(s, fmt): """Format a time value using `strftime`. """ - cur_fmt = beets.config['time_format'].get(unicode) + cur_fmt = beets.config['time_format'].as_str() return time.strftime(fmt, time.strptime(s, cur_fmt)) - def tmpl_aunique(self, keys=None, disam=None): + def tmpl_aunique(self, keys=None, disam=None, bracket=None): """Generate a string that is guaranteed to be unique among all albums in the library who share the same set of keys. A fields from "disam" is used in the string if one is sufficient to disambiguate the albums. Otherwise, a fallback opaque value is used. Both "keys" and "disam" should be given as - whitespace-separated lists of field names. + whitespace-separated lists of field names, while "bracket" is a + pair of characters to be used as brackets surrounding the + disambiguator or empty to have no brackets. """ # Fast paths: no album, no item or library, or memoized value. if not self.item or not self.lib: @@ -1421,9 +1512,19 @@ class DefaultTemplateFunctions(object): keys = keys or 'albumartist album' disam = disam or 'albumtype year label catalognum albumdisambig' + if bracket is None: + bracket = '[]' keys = keys.split() disam = disam.split() + # Assign a left and right bracket or leave blank if argument is empty. + if len(bracket) == 2: + bracket_l = bracket[0] + bracket_r = bracket[1] + else: + bracket_l = u'' + bracket_r = u'' + album = self.lib.get_album(self.item) if not album: # Do nothing for singletons. @@ -1456,13 +1557,19 @@ class DefaultTemplateFunctions(object): else: # No disambiguator distinguished all fields. - res = u' {0}'.format(album.id) + res = u' {1}{0}{2}'.format(album.id, bracket_l, bracket_r) self.lib._memotable[memokey] = res return res # Flatten disambiguation value into a string. disam_value = album.formatted(True).get(disambiguator) - res = u' [{0}]'.format(disam_value) + + # Return empty string if disambiguator is empty. + if disam_value: + res = u' {1}{0}{2}'.format(disam_value, bracket_l, bracket_r) + else: + res = u'' + self.lib._memotable[memokey] = res return res diff --git a/libs/beets/logging.py b/libs/beets/logging.py index a94da1c6..d5ec7b73 100644 --- a/libs/beets/logging.py +++ b/libs/beets/logging.py @@ -27,6 +27,7 @@ from copy import copy from logging import * # noqa import subprocess import threading +import six def logsafe(val): @@ -42,7 +43,7 @@ def logsafe(val): example. """ # Already Unicode. - if isinstance(val, unicode): + if isinstance(val, six.text_type): return val # Bytestring: needs decoding. @@ -51,16 +52,16 @@ def logsafe(val): # (a) only do this for paths, if they can be given a distinct # type, and (b) warn the developer if they do this for other # bytestrings. - return val.decode('utf8', 'replace') + return val.decode('utf-8', 'replace') # A "problem" object: needs a workaround. elif isinstance(val, subprocess.CalledProcessError): try: - return unicode(val) + return six.text_type(val) except UnicodeDecodeError: # An object with a broken __unicode__ formatter. Use __str__ # instead. - return str(val).decode('utf8', 'replace') + return str(val).decode('utf-8', 'replace') # Other objects are used as-is so field access, etc., still works in # the format string. diff --git a/libs/beets/mediafile.py b/libs/beets/mediafile.py index a359a5b4..32a32fe1 100644 --- a/libs/beets/mediafile.py +++ b/libs/beets/mediafile.py @@ -36,32 +36,29 @@ data from the tags. In turn ``MediaField`` uses a number of from __future__ import division, absolute_import, print_function import mutagen -import mutagen.mp3 import mutagen.id3 -import mutagen.oggopus -import mutagen.oggvorbis import mutagen.mp4 import mutagen.flac -import mutagen.monkeysaudio import mutagen.asf -import mutagen.aiff + +import codecs import datetime import re import base64 +import binascii import math import struct import imghdr import os import traceback import enum - -from beets import logging -from beets.util import displayable_path, syspath +import logging +import six __all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile'] -log = logging.getLogger('beets') +log = logging.getLogger(__name__) # Human-readable type names. TYPES = { @@ -76,16 +73,19 @@ TYPES = { 'mpc': 'Musepack', 'asf': 'Windows Media', 'aiff': 'AIFF', + 'dsf': 'DSD Stream File', } +PREFERRED_IMAGE_EXTENSIONS = {'jpeg': 'jpg'} + # Exceptions. class UnreadableFileError(Exception): """Mutagen is not able to extract information from the file. """ - def __init__(self, path): - Exception.__init__(self, displayable_path(path)) + def __init__(self, path, msg): + Exception.__init__(self, msg if msg else repr(path)) class FileTypeError(UnreadableFileError): @@ -95,11 +95,10 @@ class FileTypeError(UnreadableFileError): mutagen type is not supported by `Mediafile`. """ def __init__(self, path, mutagen_type=None): - path = displayable_path(path) if mutagen_type is None: - msg = path + msg = u'{0!r}: not in a recognized format'.format(path) else: - msg = u'{0}: of mutagen type {1}'.format(path, mutagen_type) + msg = u'{0}: of mutagen type {1}'.format(repr(path), mutagen_type) Exception.__init__(self, msg) @@ -107,10 +106,37 @@ class MutagenError(UnreadableFileError): """Raised when Mutagen fails unexpectedly---probably due to a bug. """ def __init__(self, path, mutagen_exc): - msg = u'{0}: {1}'.format(displayable_path(path), mutagen_exc) + msg = u'{0}: {1}'.format(repr(path), mutagen_exc) Exception.__init__(self, msg) +# Interacting with Mutagen. + +def mutagen_call(action, path, func, *args, **kwargs): + """Call a Mutagen function with appropriate error handling. + + `action` is a string describing what the function is trying to do, + and `path` is the relevant filename. The rest of the arguments + describe the callable to invoke. + + We require at least Mutagen 1.33, where `IOError` is *never* used, + neither for internal parsing errors *nor* for ordinary IO error + conditions such as a bad filename. Mutagen-specific parsing errors and IO + errors are reraised as `UnreadableFileError`. Other exceptions + raised inside Mutagen---i.e., bugs---are reraised as `MutagenError`. + """ + try: + return func(*args, **kwargs) + except mutagen.MutagenError as exc: + log.debug(u'%s failed: %s', action, six.text_type(exc)) + raise UnreadableFileError(path, six.text_type(exc)) + except Exception as exc: + # Isolate bugs in Mutagen. + log.debug(u'%s', traceback.format_exc()) + log.error(u'uncaught Mutagen exception in %s: %s', action, exc) + raise MutagenError(path, exc) + + # Utility. def _safe_cast(out_type, val): @@ -128,14 +154,13 @@ def _safe_cast(out_type, val): return int(val) else: # Process any other type as a string. - if not isinstance(val, basestring): - val = unicode(val) + if isinstance(val, bytes): + val = val.decode('utf-8', 'ignore') + elif not isinstance(val, six.string_types): + val = six.text_type(val) # Get a number from the front of the string. - val = re.match(r'[0-9]*', val.strip()).group(0) - if not val: - return 0 - else: - return int(val) + match = re.match(r'[\+-]?[0-9]+', val.strip()) + return int(match.group(0)) if match else 0 elif out_type == bool: try: @@ -144,20 +169,22 @@ def _safe_cast(out_type, val): except ValueError: return False - elif out_type == unicode: + elif out_type == six.text_type: if isinstance(val, bytes): - return val.decode('utf8', 'ignore') - elif isinstance(val, unicode): + return val.decode('utf-8', 'ignore') + elif isinstance(val, six.text_type): return val else: - return unicode(val) + return six.text_type(val) elif out_type == float: if isinstance(val, int) or isinstance(val, float): return float(val) else: - if not isinstance(val, basestring): - val = unicode(val) + if isinstance(val, bytes): + val = val.decode('utf-8', 'ignore') + else: + val = six.text_type(val) match = re.match(r'[\+-]?([0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)', val.strip()) if match: @@ -183,12 +210,12 @@ def _unpack_asf_image(data): """ type, size = struct.unpack_from(' 0 else None + if text_delimiter_index > 0: + comment = frame.value[0:text_delimiter_index] + comment = comment.decode('utf-8', 'replace') + else: + comment = None image_data = frame.value[text_delimiter_index + 1:] images.append(Image(data=image_data, type=cover_type, desc=comment)) @@ -1036,7 +1141,7 @@ class APEv2ImageStorageStyle(ListStorageStyle): for image in values: image_type = image.type or ImageType.other comment = image.desc or '' - image_data = comment.encode('utf8') + b'\x00' + image.data + image_data = comment.encode('utf-8') + b'\x00' + image.data cover_tag = self.TAG_NAMES[image_type] mutagen_file[cover_tag] = image_data @@ -1070,7 +1175,7 @@ class MediaField(object): getting this property. """ - self.out_type = kwargs.get('out_type', unicode) + self.out_type = kwargs.get('out_type', six.text_type) self._styles = styles def styles(self, mutagen_file): @@ -1109,7 +1214,7 @@ class MediaField(object): return 0.0 elif self.out_type == bool: return False - elif self.out_type == unicode: + elif self.out_type == six.text_type: return u'' @@ -1190,9 +1295,9 @@ class DateField(MediaField): """ # Get the underlying data and split on hyphens and slashes. datestring = super(DateField, self).__get__(mediafile, None) - if isinstance(datestring, basestring): - datestring = re.sub(r'[Tt ].*$', '', unicode(datestring)) - items = re.split('[-/]', unicode(datestring)) + if isinstance(datestring, six.string_types): + datestring = re.sub(r'[Tt ].*$', '', six.text_type(datestring)) + items = re.split('[-/]', six.text_type(datestring)) else: items = [] @@ -1211,7 +1316,7 @@ class DateField(MediaField): for item in items: try: items_.append(int(item)) - except: + except (TypeError, ValueError): items_.append(None) return items_ @@ -1229,7 +1334,7 @@ class DateField(MediaField): date.append(u'{0:02d}'.format(int(month))) if month and day: date.append(u'{0:02d}'.format(int(day))) - date = map(unicode, date) + date = map(six.text_type, date) super(DateField, self).__set__(mediafile, u'-'.join(date)) if hasattr(self, '_year_field'): @@ -1337,40 +1442,9 @@ class MediaFile(object): By default, MP3 files are saved with ID3v2.4 tags. You can use the older ID3v2.3 standard by specifying the `id3v23` option. """ - path = syspath(path) self.path = path - unreadable_exc = ( - mutagen.mp3.error, - mutagen.id3.error, - mutagen.flac.error, - mutagen.monkeysaudio.MonkeysAudioHeaderError, - mutagen.mp4.error, - mutagen.oggopus.error, - mutagen.oggvorbis.error, - mutagen.ogg.error, - mutagen.asf.error, - mutagen.apev2.error, - mutagen.aiff.error, - ) - try: - self.mgfile = mutagen.File(path) - except unreadable_exc as exc: - log.debug(u'header parsing failed: {0}', unicode(exc)) - raise UnreadableFileError(path) - except IOError as exc: - if type(exc) == IOError: - # This is a base IOError, not a subclass from Mutagen or - # anywhere else. - raise - else: - log.debug(u'{}', traceback.format_exc()) - raise MutagenError(path, exc) - except Exception as exc: - # Isolate bugs in Mutagen. - log.debug(u'{}', traceback.format_exc()) - log.error(u'uncaught Mutagen exception in open: {0}', exc) - raise MutagenError(path, exc) + self.mgfile = mutagen_call('open', path, mutagen.File, path) if self.mgfile is None: # Mutagen couldn't guess the type @@ -1378,20 +1452,10 @@ class MediaFile(object): elif (type(self.mgfile).__name__ == 'M4A' or type(self.mgfile).__name__ == 'MP4'): info = self.mgfile.info - if hasattr(info, 'codec'): - if info.codec and info.codec.startswith('alac'): - self.type = 'alac' - else: - self.type = 'aac' + if info.codec and info.codec.startswith('alac'): + self.type = 'alac' else: - # This hack differentiates AAC and ALAC on versions of - # Mutagen < 1.26. Once Mutagen > 1.26 is out and - # required by beets, we can remove this. - if hasattr(self.mgfile.info, 'bitrate') and \ - self.mgfile.info.bitrate > 0: - self.type = 'aac' - else: - self.type = 'alac' + self.type = 'aac' elif (type(self.mgfile).__name__ == 'ID3' or type(self.mgfile).__name__ == 'MP3'): self.type = 'mp3' @@ -1411,6 +1475,8 @@ class MediaFile(object): self.type = 'asf' elif type(self.mgfile).__name__ == 'AIFF': self.type = 'aiff' + elif type(self.mgfile).__name__ == 'DSF': + self.type = 'dsf' else: raise FileTypeError(path, type(self.mgfile).__name__) @@ -1422,7 +1488,8 @@ class MediaFile(object): self.id3v23 = id3v23 and self.type == 'mp3' def save(self): - """Write the object's tags back to the file. + """Write the object's tags back to the file. May + throw `UnreadableFileError`. """ # Possibly save the tags to ID3v2.3. kwargs = {} @@ -1434,27 +1501,13 @@ class MediaFile(object): id3.update_to_v23() kwargs['v2_version'] = 3 - # Isolate bugs in Mutagen. - try: - self.mgfile.save(**kwargs) - except (IOError, OSError): - # Propagate these through: they don't represent Mutagen bugs. - raise - except Exception as exc: - log.debug(u'{}', traceback.format_exc()) - log.error(u'uncaught Mutagen exception in save: {0}', exc) - raise MutagenError(self.path, exc) + mutagen_call('save', self.path, self.mgfile.save, **kwargs) def delete(self): - """Remove the current metadata tag from the file. + """Remove the current metadata tag from the file. May + throw `UnreadableFileError`. """ - try: - self.mgfile.delete() - except NotImplementedError: - # For Mutagen types that don't support deletion (notably, - # ASF), just delete each tag individually. - for tag in self.mgfile.keys(): - del self.mgfile[tag] + mutagen_call('delete', self.path, self.mgfile.delete) # Convenient access to the set of available fields. @@ -1466,7 +1519,12 @@ class MediaFile(object): """ for property, descriptor in cls.__dict__.items(): if isinstance(descriptor, MediaField): - yield property.decode('utf8') + if isinstance(property, bytes): + # On Python 2, class field names are bytes. This method + # produces text strings. + yield property.decode('utf8', 'ignore') + else: + yield property @classmethod def _field_sort_name(cls, name): @@ -1543,45 +1601,64 @@ class MediaFile(object): title = MediaField( MP3StorageStyle('TIT2'), - MP4StorageStyle(b"\xa9nam"), + MP4StorageStyle('\xa9nam'), StorageStyle('TITLE'), ASFStorageStyle('Title'), ) artist = MediaField( MP3StorageStyle('TPE1'), - MP4StorageStyle(b"\xa9ART"), + MP4StorageStyle('\xa9ART'), StorageStyle('ARTIST'), ASFStorageStyle('Author'), ) album = MediaField( MP3StorageStyle('TALB'), - MP4StorageStyle(b"\xa9alb"), + MP4StorageStyle('\xa9alb'), StorageStyle('ALBUM'), ASFStorageStyle('WM/AlbumTitle'), ) genres = ListMediaField( MP3ListStorageStyle('TCON'), - MP4ListStorageStyle(b"\xa9gen"), + MP4ListStorageStyle('\xa9gen'), ListStorageStyle('GENRE'), ASFStorageStyle('WM/Genre'), ) genre = genres.single_field() + lyricist = MediaField( + MP3StorageStyle('TEXT'), + MP4StorageStyle('----:com.apple.iTunes:LYRICIST'), + StorageStyle('LYRICIST'), + ASFStorageStyle('WM/Writer'), + ) composer = MediaField( MP3StorageStyle('TCOM'), - MP4StorageStyle(b"\xa9wrt"), + MP4StorageStyle('\xa9wrt'), StorageStyle('COMPOSER'), ASFStorageStyle('WM/Composer'), ) + composer_sort = MediaField( + MP3StorageStyle('TSOC'), + MP4StorageStyle('soco'), + StorageStyle('COMPOSERSORT'), + ASFStorageStyle('WM/Composersortorder'), + ) + arranger = MediaField( + MP3PeopleStorageStyle('TIPL', involvement='arranger'), + MP4StorageStyle('----:com.apple.iTunes:Arranger'), + StorageStyle('ARRANGER'), + ASFStorageStyle('beets/Arranger'), + ) + grouping = MediaField( MP3StorageStyle('TIT1'), - MP4StorageStyle(b"\xa9grp"), + MP4StorageStyle('\xa9grp'), StorageStyle('GROUPING'), ASFStorageStyle('WM/ContentGroupDescription'), ) track = MediaField( MP3SlashPackStorageStyle('TRCK', pack_pos=0), - MP4TupleStorageStyle(b'trkn', index=0), + MP4TupleStorageStyle('trkn', index=0), StorageStyle('TRACK'), StorageStyle('TRACKNUMBER'), ASFStorageStyle('WM/TrackNumber'), @@ -1589,7 +1666,7 @@ class MediaFile(object): ) tracktotal = MediaField( MP3SlashPackStorageStyle('TRCK', pack_pos=1), - MP4TupleStorageStyle(b'trkn', index=1), + MP4TupleStorageStyle('trkn', index=1), StorageStyle('TRACKTOTAL'), StorageStyle('TRACKC'), StorageStyle('TOTALTRACKS'), @@ -1598,7 +1675,7 @@ class MediaFile(object): ) disc = MediaField( MP3SlashPackStorageStyle('TPOS', pack_pos=0), - MP4TupleStorageStyle(b'disk', index=0), + MP4TupleStorageStyle('disk', index=0), StorageStyle('DISC'), StorageStyle('DISCNUMBER'), ASFStorageStyle('WM/PartOfSet'), @@ -1606,7 +1683,7 @@ class MediaFile(object): ) disctotal = MediaField( MP3SlashPackStorageStyle('TPOS', pack_pos=1), - MP4TupleStorageStyle(b'disk', index=1), + MP4TupleStorageStyle('disk', index=1), StorageStyle('DISCTOTAL'), StorageStyle('DISCC'), StorageStyle('TOTALDISCS'), @@ -1615,13 +1692,13 @@ class MediaFile(object): ) lyrics = MediaField( MP3DescStorageStyle(key='USLT'), - MP4StorageStyle(b"\xa9lyr"), + MP4StorageStyle('\xa9lyr'), StorageStyle('LYRICS'), ASFStorageStyle('WM/Lyrics'), ) comments = MediaField( MP3DescStorageStyle(key='COMM'), - MP4StorageStyle(b"\xa9cmt"), + MP4StorageStyle('\xa9cmt'), StorageStyle('DESCRIPTION'), StorageStyle('COMMENT'), ASFStorageStyle('WM/Comments'), @@ -1629,111 +1706,111 @@ class MediaFile(object): ) bpm = MediaField( MP3StorageStyle('TBPM'), - MP4StorageStyle(b'tmpo', as_type=int), + MP4StorageStyle('tmpo', as_type=int), StorageStyle('BPM'), ASFStorageStyle('WM/BeatsPerMinute'), out_type=int, ) comp = MediaField( MP3StorageStyle('TCMP'), - MP4BoolStorageStyle(b'cpil'), + MP4BoolStorageStyle('cpil'), StorageStyle('COMPILATION'), ASFStorageStyle('WM/IsCompilation', as_type=bool), out_type=bool, ) albumartist = MediaField( MP3StorageStyle('TPE2'), - MP4StorageStyle(b'aART'), + MP4StorageStyle('aART'), StorageStyle('ALBUM ARTIST'), StorageStyle('ALBUMARTIST'), ASFStorageStyle('WM/AlbumArtist'), ) albumtype = MediaField( MP3DescStorageStyle(u'MusicBrainz Album Type'), - MP4StorageStyle(b'----:com.apple.iTunes:MusicBrainz Album Type'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Type'), StorageStyle('MUSICBRAINZ_ALBUMTYPE'), ASFStorageStyle('MusicBrainz/Album Type'), ) label = MediaField( MP3StorageStyle('TPUB'), - MP4StorageStyle(b'----:com.apple.iTunes:Label'), - MP4StorageStyle(b'----:com.apple.iTunes:publisher'), + MP4StorageStyle('----:com.apple.iTunes:Label'), + MP4StorageStyle('----:com.apple.iTunes:publisher'), StorageStyle('LABEL'), StorageStyle('PUBLISHER'), # Traktor ASFStorageStyle('WM/Publisher'), ) artist_sort = MediaField( MP3StorageStyle('TSOP'), - MP4StorageStyle(b"soar"), + MP4StorageStyle('soar'), StorageStyle('ARTISTSORT'), ASFStorageStyle('WM/ArtistSortOrder'), ) albumartist_sort = MediaField( MP3DescStorageStyle(u'ALBUMARTISTSORT'), - MP4StorageStyle(b"soaa"), + MP4StorageStyle('soaa'), StorageStyle('ALBUMARTISTSORT'), ASFStorageStyle('WM/AlbumArtistSortOrder'), ) asin = MediaField( MP3DescStorageStyle(u'ASIN'), - MP4StorageStyle(b"----:com.apple.iTunes:ASIN"), + MP4StorageStyle('----:com.apple.iTunes:ASIN'), StorageStyle('ASIN'), ASFStorageStyle('MusicBrainz/ASIN'), ) catalognum = MediaField( MP3DescStorageStyle(u'CATALOGNUMBER'), - MP4StorageStyle(b"----:com.apple.iTunes:CATALOGNUMBER"), + MP4StorageStyle('----:com.apple.iTunes:CATALOGNUMBER'), StorageStyle('CATALOGNUMBER'), ASFStorageStyle('WM/CatalogNo'), ) disctitle = MediaField( MP3StorageStyle('TSST'), - MP4StorageStyle(b"----:com.apple.iTunes:DISCSUBTITLE"), + MP4StorageStyle('----:com.apple.iTunes:DISCSUBTITLE'), StorageStyle('DISCSUBTITLE'), ASFStorageStyle('WM/SetSubTitle'), ) encoder = MediaField( MP3StorageStyle('TENC'), - MP4StorageStyle(b"\xa9too"), + MP4StorageStyle('\xa9too'), StorageStyle('ENCODEDBY'), StorageStyle('ENCODER'), ASFStorageStyle('WM/EncodedBy'), ) script = MediaField( MP3DescStorageStyle(u'Script'), - MP4StorageStyle(b"----:com.apple.iTunes:SCRIPT"), + MP4StorageStyle('----:com.apple.iTunes:SCRIPT'), StorageStyle('SCRIPT'), ASFStorageStyle('WM/Script'), ) language = MediaField( MP3StorageStyle('TLAN'), - MP4StorageStyle(b"----:com.apple.iTunes:LANGUAGE"), + MP4StorageStyle('----:com.apple.iTunes:LANGUAGE'), StorageStyle('LANGUAGE'), ASFStorageStyle('WM/Language'), ) country = MediaField( - MP3DescStorageStyle('MusicBrainz Album Release Country'), - MP4StorageStyle(b"----:com.apple.iTunes:MusicBrainz " - b"Album Release Country"), + MP3DescStorageStyle(u'MusicBrainz Album Release Country'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz ' + 'Album Release Country'), StorageStyle('RELEASECOUNTRY'), ASFStorageStyle('MusicBrainz/Album Release Country'), ) albumstatus = MediaField( MP3DescStorageStyle(u'MusicBrainz Album Status'), - MP4StorageStyle(b"----:com.apple.iTunes:MusicBrainz Album Status"), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Status'), StorageStyle('MUSICBRAINZ_ALBUMSTATUS'), ASFStorageStyle('MusicBrainz/Album Status'), ) media = MediaField( MP3StorageStyle('TMED'), - MP4StorageStyle(b"----:com.apple.iTunes:MEDIA"), + MP4StorageStyle('----:com.apple.iTunes:MEDIA'), StorageStyle('MEDIA'), ASFStorageStyle('WM/Media'), ) albumdisambig = MediaField( # This tag mapping was invented for beets (not used by Picard, etc). MP3DescStorageStyle(u'MusicBrainz Album Comment'), - MP4StorageStyle(b"----:com.apple.iTunes:MusicBrainz Album Comment"), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Comment'), StorageStyle('MUSICBRAINZ_ALBUMCOMMENT'), ASFStorageStyle('MusicBrainz/Album Comment'), ) @@ -1741,7 +1818,7 @@ class MediaFile(object): # Release date. date = DateField( MP3StorageStyle('TDRC'), - MP4StorageStyle(b"\xa9day"), + MP4StorageStyle('\xa9day'), StorageStyle('DATE'), ASFStorageStyle('WM/Year'), year=(StorageStyle('YEAR'),)) @@ -1753,7 +1830,7 @@ class MediaFile(object): # *Original* release date. original_date = DateField( MP3StorageStyle('TDOR'), - MP4StorageStyle(b'----:com.apple.iTunes:ORIGINAL YEAR'), + MP4StorageStyle('----:com.apple.iTunes:ORIGINAL YEAR'), StorageStyle('ORIGINALDATE'), ASFStorageStyle('WM/OriginalReleaseYear')) @@ -1764,13 +1841,13 @@ class MediaFile(object): # Nonstandard metadata. artist_credit = MediaField( MP3DescStorageStyle(u'Artist Credit'), - MP4StorageStyle(b"----:com.apple.iTunes:Artist Credit"), + MP4StorageStyle('----:com.apple.iTunes:Artist Credit'), StorageStyle('ARTIST_CREDIT'), ASFStorageStyle('beets/Artist Credit'), ) albumartist_credit = MediaField( MP3DescStorageStyle(u'Album Artist Credit'), - MP4StorageStyle(b"----:com.apple.iTunes:Album Artist Credit"), + MP4StorageStyle('----:com.apple.iTunes:Album Artist Credit'), StorageStyle('ALBUMARTIST_CREDIT'), ASFStorageStyle('beets/Album Artist Credit'), ) @@ -1784,31 +1861,37 @@ class MediaFile(object): # MusicBrainz IDs. mb_trackid = MediaField( MP3UFIDStorageStyle(owner='http://musicbrainz.org'), - MP4StorageStyle(b'----:com.apple.iTunes:MusicBrainz Track Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Track Id'), StorageStyle('MUSICBRAINZ_TRACKID'), ASFStorageStyle('MusicBrainz/Track Id'), ) + mb_releasetrackid = MediaField( + MP3DescStorageStyle(u'MusicBrainz Release Track Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Release Track Id'), + StorageStyle('MUSICBRAINZ_RELEASETRACKID'), + ASFStorageStyle('MusicBrainz/Release Track Id'), + ) mb_albumid = MediaField( MP3DescStorageStyle(u'MusicBrainz Album Id'), - MP4StorageStyle(b'----:com.apple.iTunes:MusicBrainz Album Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Id'), StorageStyle('MUSICBRAINZ_ALBUMID'), ASFStorageStyle('MusicBrainz/Album Id'), ) mb_artistid = MediaField( MP3DescStorageStyle(u'MusicBrainz Artist Id'), - MP4StorageStyle(b'----:com.apple.iTunes:MusicBrainz Artist Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Artist Id'), StorageStyle('MUSICBRAINZ_ARTISTID'), ASFStorageStyle('MusicBrainz/Artist Id'), ) mb_albumartistid = MediaField( MP3DescStorageStyle(u'MusicBrainz Album Artist Id'), - MP4StorageStyle(b'----:com.apple.iTunes:MusicBrainz Album Artist Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Artist Id'), StorageStyle('MUSICBRAINZ_ALBUMARTISTID'), ASFStorageStyle('MusicBrainz/Album Artist Id'), ) mb_releasegroupid = MediaField( MP3DescStorageStyle(u'MusicBrainz Release Group Id'), - MP4StorageStyle(b'----:com.apple.iTunes:MusicBrainz Release Group Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Release Group Id'), StorageStyle('MUSICBRAINZ_RELEASEGROUPID'), ASFStorageStyle('MusicBrainz/Release Group Id'), ) @@ -1816,13 +1899,13 @@ class MediaFile(object): # Acoustid fields. acoustid_fingerprint = MediaField( MP3DescStorageStyle(u'Acoustid Fingerprint'), - MP4StorageStyle(b'----:com.apple.iTunes:Acoustid Fingerprint'), + MP4StorageStyle('----:com.apple.iTunes:Acoustid Fingerprint'), StorageStyle('ACOUSTID_FINGERPRINT'), ASFStorageStyle('Acoustid/Fingerprint'), ) acoustid_id = MediaField( MP3DescStorageStyle(u'Acoustid Id'), - MP4StorageStyle(b'----:com.apple.iTunes:Acoustid Id'), + MP4StorageStyle('----:com.apple.iTunes:Acoustid Id'), StorageStyle('ACOUSTID_ID'), ASFStorageStyle('Acoustid/Id'), ) @@ -1843,11 +1926,11 @@ class MediaFile(object): id3_lang='eng' ), MP4StorageStyle( - b'----:com.apple.iTunes:replaygain_track_gain', - float_places=2, suffix=b' dB' + '----:com.apple.iTunes:replaygain_track_gain', + float_places=2, suffix=' dB' ), MP4SoundCheckStorageStyle( - b'----:com.apple.iTunes:iTunNORM', + '----:com.apple.iTunes:iTunNORM', index=0 ), StorageStyle( @@ -1869,9 +1952,9 @@ class MediaFile(object): u'replaygain_album_gain', float_places=2, suffix=u' dB' ), - MP4SoundCheckStorageStyle( - b'----:com.apple.iTunes:iTunNORM', - index=1 + MP4StorageStyle( + '----:com.apple.iTunes:replaygain_album_gain', + float_places=2, suffix=' dB' ), StorageStyle( u'REPLAYGAIN_ALBUM_GAIN', @@ -1898,11 +1981,11 @@ class MediaFile(object): id3_lang='eng' ), MP4StorageStyle( - b'----:com.apple.iTunes:replaygain_track_peak', + '----:com.apple.iTunes:replaygain_track_peak', float_places=6 ), MP4SoundCheckStorageStyle( - b'----:com.apple.iTunes:iTunNORM', + '----:com.apple.iTunes:iTunNORM', index=1 ), StorageStyle(u'REPLAYGAIN_TRACK_PEAK', float_places=6), @@ -1919,7 +2002,7 @@ class MediaFile(object): float_places=6 ), MP4StorageStyle( - b'----:com.apple.iTunes:replaygain_album_peak', + '----:com.apple.iTunes:replaygain_album_peak', float_places=6 ), StorageStyle(u'REPLAYGAIN_ALBUM_PEAK', float_places=6), @@ -1927,9 +2010,41 @@ class MediaFile(object): out_type=float, ) + # EBU R128 fields. + r128_track_gain = MediaField( + MP3DescStorageStyle( + u'R128_TRACK_GAIN' + ), + MP4StorageStyle( + '----:com.apple.iTunes:R128_TRACK_GAIN' + ), + StorageStyle( + u'R128_TRACK_GAIN' + ), + ASFStorageStyle( + u'R128_TRACK_GAIN' + ), + out_type=int, + ) + r128_album_gain = MediaField( + MP3DescStorageStyle( + u'R128_ALBUM_GAIN' + ), + MP4StorageStyle( + '----:com.apple.iTunes:R128_ALBUM_GAIN' + ), + StorageStyle( + u'R128_ALBUM_GAIN' + ), + ASFStorageStyle( + u'R128_ALBUM_GAIN' + ), + out_type=int, + ) + initial_key = MediaField( MP3StorageStyle('TKEY'), - MP4StorageStyle(b'----:com.apple.iTunes:initialkey'), + MP4StorageStyle('----:com.apple.iTunes:initialkey'), StorageStyle('INITIALKEY'), ASFStorageStyle('INITIALKEY'), ) @@ -1962,13 +2077,6 @@ class MediaFile(object): @property def channels(self): """The number of channels in the audio (an int).""" - if isinstance(self.mgfile.info, mutagen.mp3.MPEGInfo): - return { - mutagen.mp3.STEREO: 2, - mutagen.mp3.JOINTSTEREO: 2, - mutagen.mp3.DUALCHANNEL: 2, - mutagen.mp3.MONO: 1, - }[self.mgfile.info.mode] if hasattr(self.mgfile.info, 'channels'): return self.mgfile.info.channels return 0 diff --git a/libs/beets/plugins.py b/libs/beets/plugins.py index 239f64fb..1bd2cacd 100644 --- a/libs/beets/plugins.py +++ b/libs/beets/plugins.py @@ -27,6 +27,7 @@ from functools import wraps import beets from beets import logging from beets import mediafile +import six PLUGIN_NAMESPACE = 'beetsplug' @@ -54,10 +55,10 @@ class PluginLogFilter(logging.Filter): def filter(self, record): if hasattr(record.msg, 'msg') and isinstance(record.msg.msg, - basestring): + six.string_types): # A _LogMessage from our hacked-up Logging replacement. record.msg.msg = self.prefix + record.msg.msg - elif isinstance(record.msg, basestring): + elif isinstance(record.msg, six.string_types): record.msg = self.prefix + record.msg return True @@ -80,6 +81,7 @@ class BeetsPlugin(object): self.template_fields = {} if not self.album_template_fields: self.album_template_fields = {} + self.early_import_stages = [] self.import_stages = [] self._log = log.getChild(self.name) @@ -93,6 +95,22 @@ class BeetsPlugin(object): """ return () + def _set_stage_log_level(self, stages): + """Adjust all the stages in `stages` to WARNING logging level. + """ + return [self._set_log_level_and_params(logging.WARNING, stage) + for stage in stages] + + def get_early_import_stages(self): + """Return a list of functions that should be called as importer + pipelines stages early in the pipeline. + + The callables are wrapped versions of the functions in + `self.early_import_stages`. Wrapping provides some bookkeeping for the + plugin: specifically, the logging level is adjusted to WARNING. + """ + return self._set_stage_log_level(self.early_import_stages) + def get_import_stages(self): """Return a list of functions that should be called as importer pipelines stages. @@ -101,8 +119,7 @@ class BeetsPlugin(object): `self.import_stages`. Wrapping provides some bookkeeping for the plugin: specifically, the logging level is adjusted to WARNING. """ - return [self._set_log_level_and_params(logging.WARNING, import_stage) - for import_stage in self.import_stages] + return self._set_stage_log_level(self.import_stages) def _set_log_level_and_params(self, base_log_level, func): """Wrap `func` to temporarily set this plugin's logger level to @@ -254,7 +271,7 @@ def load_plugins(names=()): except ImportError as exc: # Again, this is hacky: if exc.args[0].endswith(' ' + name): - log.warn(u'** plugin {0} not found', name) + log.warning(u'** plugin {0} not found', name) else: raise else: @@ -263,8 +280,8 @@ def load_plugins(names=()): and obj != BeetsPlugin and obj not in _classes: _classes.add(obj) - except: - log.warn( + except Exception: + log.warning( u'** error loading plugin {}:\n{}', name, traceback.format_exc(), @@ -350,41 +367,35 @@ def album_distance(items, album_info, mapping): def candidates(items, artist, album, va_likely): """Gets MusicBrainz candidates for an album from each plugin. """ - out = [] for plugin in find_plugins(): - out.extend(plugin.candidates(items, artist, album, va_likely)) - return out + for candidate in plugin.candidates(items, artist, album, va_likely): + yield candidate def item_candidates(item, artist, title): """Gets MusicBrainz candidates for an item from the plugins. """ - out = [] for plugin in find_plugins(): - out.extend(plugin.item_candidates(item, artist, title)) - return out + for item_candidate in plugin.item_candidates(item, artist, title): + yield item_candidate def album_for_id(album_id): """Get AlbumInfo objects for a given ID string. """ - out = [] for plugin in find_plugins(): - res = plugin.album_for_id(album_id) - if res: - out.append(res) - return out + album = plugin.album_for_id(album_id) + if album: + yield album def track_for_id(track_id): """Get TrackInfo objects for a given ID string. """ - out = [] for plugin in find_plugins(): - res = plugin.track_for_id(track_id) - if res: - out.append(res) - return out + track = plugin.track_for_id(track_id) + if track: + yield track def template_funcs(): @@ -398,6 +409,14 @@ def template_funcs(): return funcs +def early_import_stages(): + """Get a list of early import stage functions defined by plugins.""" + stages = [] + for plugin in find_plugins(): + stages += plugin.get_early_import_stages() + return stages + + def import_stages(): """Get a list of import stage functions defined by plugins.""" stages = [] @@ -483,7 +502,64 @@ def sanitize_choices(choices, choices_all): others = [x for x in choices_all if x not in choices] res = [] for s in choices: - if s in list(choices_all) + ['*']: - if not (s in seen or seen.add(s)): - res.extend(list(others) if s == '*' else [s]) + if s not in seen: + if s in list(choices_all): + res.append(s) + elif s == '*': + res.extend(others) + seen.add(s) return res + + +def sanitize_pairs(pairs, pairs_all): + """Clean up a single-element mapping configuration attribute as returned + by `confit`'s `Pairs` template: keep only two-element tuples present in + pairs_all, remove duplicate elements, expand ('str', '*') and ('*', '*') + wildcards while keeping the original order. Note that ('*', '*') and + ('*', 'whatever') have the same effect. + + For example, + + >>> sanitize_pairs( + ... [('foo', 'baz bar'), ('key', '*'), ('*', '*')], + ... [('foo', 'bar'), ('foo', 'baz'), ('foo', 'foobar'), + ... ('key', 'value')] + ... ) + [('foo', 'baz'), ('foo', 'bar'), ('key', 'value'), ('foo', 'foobar')] + """ + pairs_all = list(pairs_all) + seen = set() + others = [x for x in pairs_all if x not in pairs] + res = [] + for k, values in pairs: + for v in values.split(): + x = (k, v) + if x in pairs_all: + if x not in seen: + seen.add(x) + res.append(x) + elif k == '*': + new = [o for o in others if o not in seen] + seen.update(new) + res.extend(new) + elif v == '*': + new = [o for o in others if o not in seen and o[0] == k] + seen.update(new) + res.extend(new) + return res + + +def notify_info_yielded(event): + """Makes a generator send the event 'event' every time it yields. + This decorator is supposed to decorate a generator, but any function + returning an iterable should work. + Each yielded value is passed to plugins using the 'info' parameter of + 'send'. + """ + def decorator(generator): + def decorated(*args, **kwargs): + for v in generator(*args, **kwargs): + send(event, info=v) + yield v + return decorated + return decorator diff --git a/libs/beets/ui/__init__.py b/libs/beets/ui/__init__.py index 797df44d..af2b79a1 100644 --- a/libs/beets/ui/__init__.py +++ b/libs/beets/ui/__init__.py @@ -20,7 +20,6 @@ CLI commands are implemented in the ui.commands module. from __future__ import division, absolute_import, print_function -import locale import optparse import textwrap import sys @@ -31,6 +30,7 @@ import re import struct import traceback import os.path +from six.moves import input from beets import logging from beets import library @@ -38,9 +38,11 @@ from beets import plugins from beets import util from beets.util.functemplate import Template from beets import config -from beets.util import confit +from beets.util import confit, as_string from beets.autotag import mb from beets.dbcore import query as db_query +from beets.dbcore import db +import six # On Windows platforms, use colorama to support "ANSI" terminal colors. if sys.platform == 'win32': @@ -73,51 +75,47 @@ class UserError(Exception): # Encoding utilities. -def _in_encoding(default=u'utf-8'): +def _in_encoding(): """Get the encoding to use for *inputting* strings from the console. - - :param default: the fallback sys.stdin encoding """ - - return config['terminal_encoding'].get() or getattr(sys.stdin, 'encoding', - default) + return _stream_encoding(sys.stdin) def _out_encoding(): """Get the encoding to use for *outputting* strings to the console. """ + return _stream_encoding(sys.stdout) + + +def _stream_encoding(stream, default='utf-8'): + """A helper for `_in_encoding` and `_out_encoding`: get the stream's + preferred encoding, using a configured override or a default + fallback if neither is not specified. + """ # Configured override? encoding = config['terminal_encoding'].get() if encoding: return encoding - # For testing: When sys.stdout is a StringIO under the test harness, - # it doesn't have an `encoding` attribute. Just use UTF-8. - if not hasattr(sys.stdout, 'encoding'): - return 'utf8' + # For testing: When sys.stdout or sys.stdin is a StringIO under the + # test harness, it doesn't have an `encoding` attribute. Just use + # UTF-8. + if not hasattr(stream, 'encoding'): + return default # Python's guessed output stream encoding, or UTF-8 as a fallback # (e.g., when piped to a file). - return sys.stdout.encoding or 'utf8' - - -def _arg_encoding(): - """Get the encoding for command-line arguments (and other OS - locale-sensitive strings). - """ - try: - return locale.getdefaultlocale()[1] or 'utf8' - except ValueError: - # Invalid locale environment variable setting. To avoid - # failing entirely for no good reason, assume UTF-8. - return 'utf8' + return stream.encoding or default def decargs(arglist): """Given a list of command-line argument bytestrings, attempts to - decode them to Unicode strings. + decode them to Unicode strings when running under Python 2. """ - return [s.decode(_arg_encoding()) for s in arglist] + if six.PY2: + return [s.decode(util.arg_encoding()) for s in arglist] + else: + return arglist def print_(*strings, **kwargs): @@ -125,27 +123,37 @@ def print_(*strings, **kwargs): is not in the terminal's encoding's character set, just silently replaces it. - If the arguments are strings then they're expected to share the same - type: either bytes or unicode. + The arguments must be Unicode strings: `unicode` on Python 2; `str` on + Python 3. The `end` keyword argument behaves similarly to the built-in `print` - (it defaults to a newline). The value should have the same string - type as the arguments. + (it defaults to a newline). """ - end = kwargs.get('end') + if not strings: + strings = [u''] + assert isinstance(strings[0], six.text_type) - if not strings or isinstance(strings[0], unicode): - txt = u' '.join(strings) - txt += u'\n' if end is None else end + txt = u' '.join(strings) + txt += kwargs.get('end', u'\n') + + # Encode the string and write it to stdout. + if six.PY2: + # On Python 2, sys.stdout expects bytes. + out = txt.encode(_out_encoding(), 'replace') + sys.stdout.write(out) else: - txt = b' '.join(strings) - txt += b'\n' if end is None else end - - # Always send bytes to the stdout stream. - if isinstance(txt, unicode): - txt = txt.encode(_out_encoding(), 'replace') - - sys.stdout.write(txt) + # On Python 3, sys.stdout expects text strings and uses the + # exception-throwing encoding error policy. To avoid throwing + # errors and use our configurable encoding override, we use the + # underlying bytes buffer instead. + if hasattr(sys.stdout, 'buffer'): + out = txt.encode(_out_encoding(), 'replace') + sys.stdout.buffer.write(out) + sys.stdout.buffer.flush() + else: + # In our test harnesses (e.g., DummyOut), sys.stdout.buffer + # does not exist. We instead just record the text string. + sys.stdout.write(txt) # Configuration wrappers. @@ -188,23 +196,26 @@ def should_move(move_opt=None): # Input prompts. def input_(prompt=None): - """Like `raw_input`, but decodes the result to a Unicode string. + """Like `input`, but decodes the result to a Unicode string. Raises a UserError if stdin is not available. The prompt is sent to stdout rather than stderr. A printed between the prompt and the input cursor. """ # raw_input incorrectly sends prompts to stderr, not stdout, so we - # use print() explicitly to display prompts. + # use print_() explicitly to display prompts. # http://bugs.python.org/issue1927 if prompt: - print_(prompt, end=' ') + print_(prompt, end=u' ') try: - resp = raw_input() + resp = input() except EOFError: raise UserError(u'stdin stream ended while input required') - return resp.decode(_in_encoding(), 'ignore') + if six.PY2: + return resp.decode(_in_encoding(), 'ignore') + else: + return resp def input_options(options, require=False, prompt=None, fallback_prompt=None, @@ -256,7 +267,7 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, # Mark the option's shortcut letter for display. if not require and ( (default is None and not numrange and first) or - (isinstance(default, basestring) and + (isinstance(default, six.string_types) and found_letter.lower() == default.lower())): # The first option is the default; mark it. show_letter = '[%s]' % found_letter.upper() @@ -292,11 +303,11 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, prompt_part_lengths = [] if numrange: if isinstance(default, int): - default_name = unicode(default) + default_name = six.text_type(default) default_name = colorize('action_default', default_name) tmpl = '# selection (default %s)' prompt_parts.append(tmpl % default_name) - prompt_part_lengths.append(len(tmpl % unicode(default))) + prompt_part_lengths.append(len(tmpl % six.text_type(default))) else: prompt_parts.append('# selection') prompt_part_lengths.append(len(prompt_parts[-1])) @@ -516,7 +527,8 @@ def colorize(color_name, text): if config['ui']['color']: global COLORS if not COLORS: - COLORS = dict((name, config['ui']['colors'][name].get(unicode)) + COLORS = dict((name, + config['ui']['colors'][name].as_str()) for name in COLOR_NAMES) # In case a 3rd party plugin is still passing the actual color ('red') # instead of the abstract color name ('text_error') @@ -536,10 +548,11 @@ def _colordiff(a, b, highlight='text_highlight', highlighted intelligently to show differences; other values are stringified and highlighted in their entirety. """ - if not isinstance(a, basestring) or not isinstance(b, basestring): + if not isinstance(a, six.string_types) \ + or not isinstance(b, six.string_types): # Non-strings: use ordinary equality. - a = unicode(a) - b = unicode(b) + a = six.text_type(a) + b = six.text_type(b) if a == b: return a, b else: @@ -587,7 +600,7 @@ def colordiff(a, b, highlight='text_highlight'): if config['ui']['color']: return _colordiff(a, b, highlight) else: - return unicode(a), unicode(b) + return six.text_type(a), six.text_type(b) def get_path_formats(subview=None): @@ -598,7 +611,7 @@ def get_path_formats(subview=None): subview = subview or config['paths'] for query, view in subview.items(): query = PF_KEY_QUERIES.get(query, query) # Expand common queries. - path_formats.append((query, Template(view.get(unicode)))) + path_formats.append((query, Template(view.as_str()))) return path_formats @@ -666,7 +679,7 @@ def _field_diff(field, old, new): # For strings, highlight changes. For others, colorize the whole # thing. - if isinstance(oldval, basestring): + if isinstance(oldval, six.string_types): oldstr, newstr = colordiff(oldval, newstr) else: oldstr = colorize('text_error', oldstr) @@ -757,6 +770,34 @@ def show_path_changes(path_changes): log.info(u'{0} {1} -> {2}', source, ' ' * pad, dest) +# Helper functions for option parsing. + +def _store_dict(option, opt_str, value, parser): + """Custom action callback to parse options which have ``key=value`` + pairs as values. All such pairs passed for this option are + aggregated into a dictionary. + """ + dest = option.dest + option_values = getattr(parser.values, dest, None) + + if option_values is None: + # This is the first supplied ``key=value`` pair of option. + # Initialize empty dictionary and get a reference to it. + setattr(parser.values, dest, dict()) + option_values = getattr(parser.values, dest) + + try: + key, value = map(lambda s: util.text_string(s), value.split('=')) + if not (key and value): + raise ValueError + except ValueError: + raise UserError( + "supplied argument `{0}' is not of the form `key=value'" + .format(value)) + + option_values[key] = value + + class CommonOptionsParser(optparse.OptionParser, object): """Offers a simple way to add common formatting options. @@ -799,7 +840,14 @@ class CommonOptionsParser(optparse.OptionParser, object): if store_true: setattr(parser.values, option.dest, True) - value = fmt or value and unicode(value) or '' + # Use the explicitly specified format, or the string from the option. + if fmt: + value = fmt + elif value: + value, = decargs([value]) + else: + value = u'' + parser.values.format = value if target: config[target._format_config_key].set(value) @@ -830,7 +878,7 @@ class CommonOptionsParser(optparse.OptionParser, object): """ path = optparse.Option(*flags, nargs=0, action='callback', callback=self._set_format, - callback_kwargs={'fmt': '$path', + callback_kwargs={'fmt': u'$path', 'store_true': True}, help=u'print paths for matched items or albums') self.add_option(path) @@ -852,7 +900,7 @@ class CommonOptionsParser(optparse.OptionParser, object): """ kwargs = {} if target: - if isinstance(target, basestring): + if isinstance(target, six.string_types): target = {'item': library.Item, 'album': library.Album}[target] kwargs['target'] = target @@ -911,7 +959,7 @@ class Subcommand(object): def root_parser(self, root_parser): self._root_parser = root_parser self.parser.prog = '{0} {1}'.format( - root_parser.get_prog_name().decode('utf8'), self.name) + as_string(root_parser.get_prog_name()), self.name) class SubcommandsOptionParser(CommonOptionsParser): @@ -1044,54 +1092,24 @@ class SubcommandsOptionParser(CommonOptionsParser): optparse.Option.ALWAYS_TYPED_ACTIONS += ('callback',) -def vararg_callback(option, opt_str, value, parser): - """Callback for an option with variable arguments. - Manually collect arguments right of a callback-action - option (ie. with action="callback"), and add the resulting - list to the destination var. - - Usage: - parser.add_option("-c", "--callback", dest="vararg_attr", - action="callback", callback=vararg_callback) - - Details: - http://docs.python.org/2/library/optparse.html#callback-example-6-variable - -arguments - """ - value = [value] - - def floatable(str): - try: - float(str) - return True - except ValueError: - return False - - for arg in parser.rargs: - # stop on --foo like options - if arg[:2] == "--" and len(arg) > 2: - break - # stop on -a, but not on -3 or -3.0 - if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): - break - value.append(arg) - - del parser.rargs[:len(value) - 1] - setattr(parser.values, option.dest, value) - - # The main entry point and bootstrapping. def _load_plugins(config): """Load the plugins specified in the configuration. """ - paths = config['pluginpath'].get(confit.StrSeq(split=False)) - paths = map(util.normpath, paths) + paths = config['pluginpath'].as_str_seq(split=False) + paths = [util.normpath(p) for p in paths] log.debug(u'plugin paths: {0}', util.displayable_path(paths)) + # On Python 3, the search paths need to be unicode. + paths = [util.py3_path(p) for p in paths] + + # Extend the `beetsplug` package to include the plugin paths. import beetsplug beetsplug.__path__ = paths + beetsplug.__path__ - # For backwards compatibility. + + # For backwards compatibility, also support plugin paths that + # *contain* a `beetsplug` package. sys.path += paths plugins.load_plugins(config['plugins'].as_str_seq()) @@ -1133,9 +1151,11 @@ def _configure(options): # special handling lets specified plugins get loaded before we # finish parsing the command line. if getattr(options, 'config', None) is not None: - config_path = options.config + overlay_path = options.config del options.config - config.set_file(config_path) + config.set_file(overlay_path) + else: + overlay_path = None config.set_args(options) # Configure the logger. @@ -1144,27 +1164,9 @@ def _configure(options): else: log.set_global_level(logging.INFO) - # Ensure compatibility with old (top-level) color configuration. - # Deprecation msg to motivate user to switch to config['ui']['color]. - if config['color'].exists(): - log.warning(u'Warning: top-level configuration of `color` ' - u'is deprecated. Configure color use under `ui`. ' - u'See documentation for more info.') - config['ui']['color'].set(config['color'].get(bool)) - - # Compatibility from list_format_{item,album} to format_{item,album} - for elem in ('item', 'album'): - old_key = 'list_format_{0}'.format(elem) - if config[old_key].exists(): - new_key = 'format_{0}'.format(elem) - log.warning( - u'Warning: configuration uses "{0}" which is deprecated' - u' in favor of "{1}" now that it affects all commands. ' - u'See changelog & documentation.', - old_key, - new_key, - ) - config[new_key].set(config[old_key]) + if overlay_path: + log.debug(u'overlaying configuration: {0}', + util.displayable_path(overlay_path)) config_path = config.user_config_path() if os.path.isfile(config_path): @@ -1182,7 +1184,7 @@ def _configure(options): def _open_library(config): """Create a new library instance from the configuration. """ - dbpath = config['library'].as_filename() + dbpath = util.bytestring_path(config['library'].as_filename()) try: lib = library.Library( dbpath, @@ -1233,6 +1235,7 @@ def _raw_main(args, lib=None): from beets.ui.commands import config_edit return config_edit() + test_lib = bool(lib) subcommands, plugins, lib = _setup(options, lib) parser.add_subcommand(*subcommands) @@ -1240,6 +1243,9 @@ def _raw_main(args, lib=None): subcommand.func(lib, suboptions, subargs) plugins.send('cli_exit', lib=lib) + if not test_lib: + # Clean up the library unless it came from the test harness. + lib._close() def main(args=None): @@ -1270,9 +1276,16 @@ def main(args=None): except IOError as exc: if exc.errno == errno.EPIPE: # "Broken pipe". End silently. - pass + sys.stderr.close() else: raise except KeyboardInterrupt: # Silently ignore ^C except in verbose mode. log.debug(u'{}', traceback.format_exc()) + except db.DBAccessError as exc: + log.error( + u'database access error: {0}\n' + u'the library file might have a permissions problem', + exc + ) + sys.exit(1) diff --git a/libs/beets/ui/commands.py b/libs/beets/ui/commands.py index 867a4737..46ae1d93 100644 --- a/libs/beets/ui/commands.py +++ b/libs/beets/ui/commands.py @@ -21,6 +21,7 @@ from __future__ import division, absolute_import, print_function import os import re +from platform import python_version from collections import namedtuple, Counter from itertools import chain @@ -33,14 +34,17 @@ from beets.autotag import hooks from beets import plugins from beets import importer from beets import util -from beets.util import syspath, normpath, ancestry, displayable_path +from beets.util import syspath, normpath, ancestry, displayable_path, \ + MoveOperation from beets import library from beets import config from beets import logging from beets.util.confit import _package_path +import six +from . import _store_dict VARIOUS_ARTISTS = u'Various Artists' -PromptChoice = namedtuple('ExtraChoice', ['short', 'long', 'callback']) +PromptChoice = namedtuple('PromptChoice', ['short', 'long', 'callback']) # Global logger. log = logging.getLogger('beets') @@ -82,16 +86,16 @@ def _do_query(lib, query, album, also_items=True): def _print_keys(query): """Given a SQLite query result, print the `key` field of each - returned row, with identation of 2 spaces. + returned row, with indentation of 2 spaces. """ for row in query: - print_(' ' * 2 + row['key']) + print_(u' ' * 2 + row['key']) def fields_func(lib, opts, args): def _print_rows(names): names.sort() - print_(" " + "\n ".join(names)) + print_(u' ' + u'\n '.join(names)) print_(u"Item fields:") _print_rows(library.Item.all_keys()) @@ -156,14 +160,14 @@ def disambig_string(info): if isinstance(info, hooks.AlbumInfo): if info.media: - if info.mediums > 1: + if info.mediums and info.mediums > 1: disambig.append(u'{0}x{1}'.format( info.mediums, info.media )) else: disambig.append(info.media) if info.year: - disambig.append(unicode(info.year)) + disambig.append(six.text_type(info.year)) if info.country: disambig.append(info.country) if info.label: @@ -233,12 +237,12 @@ def show_change(cur_artist, cur_album, match): medium = track_info.disc mediums = track_info.disctotal if config['per_disc_numbering']: - if mediums > 1: + if mediums and mediums > 1: return u'{0}-{1}'.format(medium, medium_index) else: - return unicode(medium_index) + return six.text_type(medium_index or index) else: - return unicode(index) + return six.text_type(index) # Identify the album in question. if cur_artist != match.info.artist or \ @@ -279,7 +283,7 @@ def show_change(cur_artist, cur_album, match): print_(' '.join(info)) # Tracks. - pairs = match.mapping.items() + pairs = list(match.mapping.items()) pairs.sort(key=lambda item_and_track_info: item_and_track_info[1].index) # Build up LHS and RHS for track difference display. The `lines` list @@ -493,7 +497,7 @@ def _summary_judgment(rec): def choose_candidate(candidates, singleton, rec, cur_artist=None, cur_album=None, item=None, itemcount=None, - extra_choices=[]): + choices=[]): """Given a sorted list of candidates, ask the user for a selection of which candidate to use. Applies to both full albums and singletons (tracks). Candidates are either AlbumMatch or TrackMatch @@ -501,16 +505,12 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, `cur_album`, and `itemcount` must be provided. For singletons, `item` must be provided. - `extra_choices` is a list of `PromptChoice`s, containg the choices - appended by the plugins after receiving the `before_choose_candidate` - event. If not empty, the choices are appended to the prompt presented - to the user. + `choices` is a list of `PromptChoice`s to be used in each prompt. Returns one of the following: - * the result of the choice, which may be SKIP, ASIS, TRACKS, or MANUAL + * the result of the choice, which may be SKIP or ASIS * a candidate (an AlbumMatch/TrackMatch object) - * the short letter of a `PromptChoice` (if the user selected one of - the `extra_choices`). + * a chosen `PromptChoice` from `choices` """ # Sanity check. if singleton: @@ -519,41 +519,22 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, assert cur_artist is not None assert cur_album is not None - # Build helper variables for extra choices. - extra_opts = tuple(c.long for c in extra_choices) - extra_actions = tuple(c.short for c in extra_choices) + # Build helper variables for the prompt choices. + choice_opts = tuple(c.long for c in choices) + choice_actions = {c.short: c for c in choices} # Zero candidates. if not candidates: if singleton: print_(u"No matching recordings found.") - opts = (u'Use as-is', u'Skip', u'Enter search', u'enter Id', - u'aBort') else: print_(u"No matching release found for {0} tracks." .format(itemcount)) print_(u'For help, see: ' u'http://beets.readthedocs.org/en/latest/faq.html#nomatch') - opts = (u'Use as-is', u'as Tracks', u'Group albums', u'Skip', - u'Enter search', u'enter Id', u'aBort') - sel = ui.input_options(opts + extra_opts) - if sel == u'u': - return importer.action.ASIS - elif sel == u't': - assert not singleton - return importer.action.TRACKS - elif sel == u'e': - return importer.action.MANUAL - elif sel == u's': - return importer.action.SKIP - elif sel == u'b': - raise importer.ImportAbort() - elif sel == u'i': - return importer.action.MANUAL_ID - elif sel == u'g': - return importer.action.ALBUMS - elif sel in extra_actions: - return sel + sel = ui.input_options(choice_opts) + if sel in choice_actions: + return choice_actions[sel] else: assert False @@ -601,33 +582,12 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, print_(u' '.join(line)) # Ask the user for a choice. - if singleton: - opts = (u'Skip', u'Use as-is', u'Enter search', u'enter Id', - u'aBort') - else: - opts = (u'Skip', u'Use as-is', u'as Tracks', u'Group albums', - u'Enter search', u'enter Id', u'aBort') - sel = ui.input_options(opts + extra_opts, + sel = ui.input_options(choice_opts, numrange=(1, len(candidates))) - if sel == u's': - return importer.action.SKIP - elif sel == u'u': - return importer.action.ASIS - elif sel == u'm': + if sel == u'm': pass - elif sel == u'e': - return importer.action.MANUAL - elif sel == u't': - assert not singleton - return importer.action.TRACKS - elif sel == u'b': - raise importer.ImportAbort() - elif sel == u'i': - return importer.action.MANUAL_ID - elif sel == u'g': - return importer.action.ALBUMS - elif sel in extra_actions: - return sel + elif sel in choice_actions: + return choice_actions[sel] else: # Numerical selection. match = candidates[sel - 1] if sel != 1: @@ -647,13 +607,6 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, return match # Ask for confirmation. - if singleton: - opts = (u'Apply', u'More candidates', u'Skip', u'Use as-is', - u'Enter search', u'enter Id', u'aBort') - else: - opts = (u'Apply', u'More candidates', u'Skip', u'Use as-is', - u'as Tracks', u'Group albums', u'Enter search', - u'enter Id', u'aBort') default = config['import']['default_action'].as_choice({ u'apply': u'a', u'skip': u's', @@ -662,43 +615,57 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, }) if default is None: require = True - sel = ui.input_options(opts + extra_opts, require=require, - default=default) + # Bell ring when user interaction is needed. + if config['import']['bell']: + ui.print_(u'\a', end=u'') + sel = ui.input_options((u'Apply', u'More candidates') + choice_opts, + require=require, default=default) if sel == u'a': return match - elif sel == u'g': - return importer.action.ALBUMS - elif sel == u's': - return importer.action.SKIP - elif sel == u'u': - return importer.action.ASIS - elif sel == u't': - assert not singleton - return importer.action.TRACKS - elif sel == u'e': - return importer.action.MANUAL - elif sel == u'b': - raise importer.ImportAbort() - elif sel == u'i': - return importer.action.MANUAL_ID - elif sel in extra_actions: - return sel + elif sel in choice_actions: + return choice_actions[sel] -def manual_search(singleton): - """Input either an artist and album (for full albums) or artist and +def manual_search(session, task): + """Get a new `Proposal` using manual search criteria. + + Input either an artist and album (for full albums) or artist and track name (for singletons) for manual search. """ - artist = input_(u'Artist:') - name = input_(u'Track:' if singleton else u'Album:') - return artist.strip(), name.strip() + artist = input_(u'Artist:').strip() + name = input_(u'Album:' if task.is_album else u'Track:').strip() + + if task.is_album: + _, _, prop = autotag.tag_album( + task.items, artist, name + ) + return prop + else: + return autotag.tag_item(task.item, artist, name) -def manual_id(singleton): - """Input an ID, either for an album ("release") or a track ("recording"). +def manual_id(session, task): + """Get a new `Proposal` using a manually-entered ID. + + Input an ID, either for an album ("release") or a track ("recording"). """ - prompt = u'Enter {0} ID:'.format(u'recording' if singleton else u'release') - return input_(prompt).strip() + prompt = u'Enter {0} ID:'.format(u'release' if task.is_album + else u'recording') + search_id = input_(prompt).strip() + + if task.is_album: + _, _, prop = autotag.tag_album( + task.items, search_ids=search_id.split() + ) + return prop + else: + return autotag.tag_item(task.item, search_ids=search_id.split()) + + +def abort_action(session, task): + """A prompt choice callback that aborts the importer. + """ + raise importer.ImportAbort() class TerminalImportSession(importer.ImportSession): @@ -724,42 +691,34 @@ class TerminalImportSession(importer.ImportSession): return action # Loop until we have a choice. - candidates, rec = task.candidates, task.rec while True: - # Gather extra choices from plugins. - extra_choices = self._get_plugin_choices(task) - extra_ops = {c.short: c.callback for c in extra_choices} - - # Ask for a choice from the user. + # Ask for a choice from the user. The result of + # `choose_candidate` may be an `importer.action`, an + # `AlbumMatch` object for a specific selection, or a + # `PromptChoice`. + choices = self._get_choices(task) choice = choose_candidate( - candidates, False, rec, task.cur_artist, task.cur_album, - itemcount=len(task.items), extra_choices=extra_choices + task.candidates, False, task.rec, task.cur_artist, + task.cur_album, itemcount=len(task.items), choices=choices ) - # Choose which tags to use. - if choice in (importer.action.SKIP, importer.action.ASIS, - importer.action.TRACKS, importer.action.ALBUMS): + # Basic choices that require no more action here. + if choice in (importer.action.SKIP, importer.action.ASIS): # Pass selection to main control flow. return choice - elif choice is importer.action.MANUAL: - # Try again with manual search terms. - search_artist, search_album = manual_search(False) - _, _, candidates, rec = autotag.tag_album( - task.items, search_artist, search_album - ) - elif choice is importer.action.MANUAL_ID: - # Try a manually-entered ID. - search_id = manual_id(False) - if search_id: - _, _, candidates, rec = autotag.tag_album( - task.items, search_ids=search_id.split() - ) - elif choice in extra_ops.keys(): - # Allow extra ops to automatically set the post-choice. - post_choice = extra_ops[choice](self, task) + + # Plugin-provided choices. We invoke the associated callback + # function. + elif choice in choices: + post_choice = choice.callback(self, task) if isinstance(post_choice, importer.action): - # MANUAL and MANUAL_ID have no effect, even if returned. return post_choice + elif isinstance(post_choice, autotag.Proposal): + # Use the new candidates and continue around the loop. + task.candidates = post_choice.candidates + task.rec = post_choice.recommendation + + # Otherwise, we have a specific match selection. else: # We have a candidate! Finish tagging. Here, choice is an # AlbumMatch object. @@ -771,7 +730,7 @@ class TerminalImportSession(importer.ImportSession): either an action constant or a TrackMatch object. """ print_() - print_(task.item.path) + print_(displayable_path(task.item.path)) candidates, rec = task.candidates, task.rec # Take immediate action if appropriate. @@ -784,34 +743,22 @@ class TerminalImportSession(importer.ImportSession): return action while True: - extra_choices = self._get_plugin_choices(task) - extra_ops = {c.short: c.callback for c in extra_choices} - # Ask for a choice. + choices = self._get_choices(task) choice = choose_candidate(candidates, True, rec, item=task.item, - extra_choices=extra_choices) + choices=choices) if choice in (importer.action.SKIP, importer.action.ASIS): return choice - elif choice == importer.action.TRACKS: - assert False # TRACKS is only legal for albums. - elif choice == importer.action.MANUAL: - # Continue in the loop with a new set of candidates. - search_artist, search_title = manual_search(True) - candidates, rec = autotag.tag_item(task.item, search_artist, - search_title) - elif choice == importer.action.MANUAL_ID: - # Ask for a track ID. - search_id = manual_id(True) - if search_id: - candidates, rec = autotag.tag_item( - task.item, search_ids=search_id.split()) - elif choice in extra_ops.keys(): - # Allow extra ops to automatically set the post-choice. - post_choice = extra_ops[choice](self, task) + + elif choice in choices: + post_choice = choice.callback(self, task) if isinstance(post_choice, importer.action): - # MANUAL and MANUAL_ID have no effect, even if returned. return post_choice + elif isinstance(post_choice, autotag.Proposal): + candidates = post_choice.candidates + rec = post_choice.recommendation + else: # Chose a candidate. assert isinstance(choice, autotag.TrackMatch) @@ -821,8 +768,8 @@ class TerminalImportSession(importer.ImportSession): """Decide what to do when a new album or item seems similar to one that's already in the library. """ - log.warn(u"This {0} is already in the library!", - (u"album" if task.is_album else u"item")) + log.warning(u"This {0} is already in the library!", + (u"album" if task.is_album else u"item")) if config['import']['quiet']: # In quiet mode, don't prompt -- just skip. @@ -843,7 +790,7 @@ class TerminalImportSession(importer.ImportSession): )) sel = ui.input_options( - (u'Skip new', u'Keep both', u'Remove old') + (u'Skip new', u'Keep both', u'Remove old', u'Merge all') ) if sel == u's': @@ -855,6 +802,8 @@ class TerminalImportSession(importer.ImportSession): elif sel == u'r': # Remove old. task.should_remove_duplicates = True + elif sel == u'm': + task.should_merge_duplicates = True else: assert False @@ -863,8 +812,10 @@ class TerminalImportSession(importer.ImportSession): u"was interrupted. Resume (Y/n)?" .format(displayable_path(path))) - def _get_plugin_choices(self, task): - """Get the extra choices appended to the plugins to the ui prompt. + def _get_choices(self, task): + """Get the list of prompt choices that should be presented to the + user. This consists of both built-in choices and ones provided by + plugins. The `before_choose_candidate` event is sent to the plugins, with session and task as its parameters. Plugins are responsible for @@ -877,20 +828,37 @@ class TerminalImportSession(importer.ImportSession): Returns a list of `PromptChoice`s. """ + # Standard, built-in choices. + choices = [ + PromptChoice(u's', u'Skip', + lambda s, t: importer.action.SKIP), + PromptChoice(u'u', u'Use as-is', + lambda s, t: importer.action.ASIS) + ] + if task.is_album: + choices += [ + PromptChoice(u't', u'as Tracks', + lambda s, t: importer.action.TRACKS), + PromptChoice(u'g', u'Group albums', + lambda s, t: importer.action.ALBUMS), + ] + choices += [ + PromptChoice(u'e', u'Enter search', manual_search), + PromptChoice(u'i', u'enter Id', manual_id), + PromptChoice(u'b', u'aBort', abort_action), + ] + # Send the before_choose_candidate event and flatten list. extra_choices = list(chain(*plugins.send('before_choose_candidate', session=self, task=task))) - # Take into account default options, for duplicate checking. - all_choices = [PromptChoice(u'a', u'Apply', None), - PromptChoice(u's', u'Skip', None), - PromptChoice(u'u', u'Use as-is', None), - PromptChoice(u't', u'as Tracks', None), - PromptChoice(u'g', u'Group albums', None), - PromptChoice(u'e', u'Enter search', None), - PromptChoice(u'i', u'enter Id', None), - PromptChoice(u'b', u'aBort', None)] +\ - extra_choices + # Add a "dummy" choice for the other baked-in option, for + # duplicate checking. + all_choices = [ + PromptChoice(u'a', u'Apply', None), + ] + choices + extra_choices + + # Check for conflicts. short_letters = [c.short for c in all_choices] if len(short_letters) != len(set(short_letters)): # Duplicate short letter has been found. @@ -900,11 +868,12 @@ class TerminalImportSession(importer.ImportSession): # Keep the first of the choices, removing the rest. dup_choices = [c for c in all_choices if c.short == short] for c in dup_choices[1:]: - log.warn(u"Prompt choice '{0}' removed due to conflict " - u"with '{1}' (short letter: '{2}')", - c.long, dup_choices[0].long, c.short) + log.warning(u"Prompt choice '{0}' removed due to conflict " + u"with '{1}' (short letter: '{2}')", + c.long, dup_choices[0].long, c.short) extra_choices.remove(c) - return extra_choices + + return choices + extra_choices # The import command. @@ -964,6 +933,13 @@ def import_func(lib, opts, args): if not paths: raise ui.UserError(u'no path specified') + # On Python 2, we get filenames as raw bytes, which is what we + # need. On Python 3, we need to undo the "helpful" conversion to + # Unicode strings to get the real bytestring filename. + if not six.PY2: + paths = [p.encode(util.arg_encoding(), 'surrogateescape') + for p in paths] + import_files(lib, paths, query) @@ -978,6 +954,10 @@ import_cmd.parser.add_option( u'-C', u'--nocopy', action='store_false', dest='copy', help=u"don't copy tracks (opposite of -c)" ) +import_cmd.parser.add_option( + u'-m', u'--move', action='store_true', dest='move', + help=u"move tracks into the library (overrides -c)" +) import_cmd.parser.add_option( u'-w', u'--write', action='store_true', default=None, help=u"write new metadata to files' tags (default)" @@ -1030,6 +1010,10 @@ import_cmd.parser.add_option( u'-I', u'--noincremental', dest='incremental', action='store_false', help=u'do not skip already-imported directories' ) +import_cmd.parser.add_option( + u'--from-scratch', dest='from_scratch', action='store_true', + help=u'erase existing metadata before applying new metadata' +) import_cmd.parser.add_option( u'--flat', dest='flat', action='store_true', help=u'import an entire tree as a single album' @@ -1044,16 +1028,22 @@ import_cmd.parser.add_option( ) import_cmd.parser.add_option( u'-S', u'--search-id', dest='search_ids', action='append', - metavar='BACKEND_ID', + metavar='ID', help=u'restrict matching to a specific metadata backend ID' ) +import_cmd.parser.add_option( + u'--set', dest='set_fields', action='callback', + callback=_store_dict, + metavar='FIELD=VALUE', + help=u'set the given fields to the supplied values' +) import_cmd.func = import_func default_commands.append(import_cmd) # list: Query and show library contents. -def list_items(lib, query, album, fmt=''): +def list_items(lib, query, album, fmt=u''): """Print out items in lib matching query. If album, then search for albums instead of single items. """ @@ -1079,11 +1069,18 @@ default_commands.append(list_cmd) # update: Update library contents according to on-disk tags. -def update_items(lib, query, album, move, pretend): +def update_items(lib, query, album, move, pretend, fields): """For all the items matched by the query, update the library to reflect the item's embedded tags. + :param fields: The fields to be stored. If not specified, all fields will + be. """ with lib.transaction(): + if move and fields is not None and 'path' not in fields: + # Special case: if an item needs to be moved, the path field has to + # updated; otherwise the new path will not be reflected in the + # database. + fields.append('path') items, _ = _do_query(lib, query, album) # Walk through the items and pick up their changes. @@ -1122,24 +1119,25 @@ def update_items(lib, query, album, move, pretend): item._dirty.discard(u'albumartist') # Check for and display changes. - changed = ui.show_model_changes(item, - fields=library.Item._media_fields) + changed = ui.show_model_changes( + item, + fields=fields or library.Item._media_fields) # Save changes. if not pretend: if changed: # Move the item if it's in the library. if move and lib.directory in ancestry(item.path): - item.move() + item.move(store=False) - item.store() + item.store(fields=fields) affected_albums.add(item.album_id) else: # The file's mtime was different, but there were no # changes to the metadata. Store the new mtime, # which is set in the call to read(), so we don't # check this again in the future. - item.store() + item.store(fields=fields) # Skip album changes while pretending. if pretend: @@ -1158,17 +1156,24 @@ def update_items(lib, query, album, move, pretend): # Update album structure to reflect an item in it. for key in library.Album.item_keys: album[key] = first_item[key] - album.store() + album.store(fields=fields) # Move album art (and any inconsistent items). if move and lib.directory in ancestry(first_item.path): log.debug(u'moving album {0}', album_id) - album.move() + + # Manually moving and storing the album. + items = list(album.items()) + for item in items: + item.move(store=False) + item.store(fields=fields) + album.move(store=False) + album.store(fields=fields) def update_func(lib, opts, args): update_items(lib, decargs(args), opts.album, ui.should_move(opts.move), - opts.pretend) + opts.pretend, opts.fields) update_cmd = ui.Subcommand( @@ -1188,37 +1193,43 @@ update_cmd.parser.add_option( u'-p', u'--pretend', action='store_true', help=u"show all changes but do nothing" ) +update_cmd.parser.add_option( + u'-F', u'--field', default=None, action='append', dest='fields', + help=u'list of fields to update' +) update_cmd.func = update_func default_commands.append(update_cmd) # remove: Remove items from library, delete files. -def remove_items(lib, query, album, delete): +def remove_items(lib, query, album, delete, force): """Remove items matching query from lib. If album, then match and remove whole albums. If delete, also remove files from disk. """ # Get the matching items. items, albums = _do_query(lib, query, album) - # Prepare confirmation with user. - print_() - if delete: - fmt = u'$path - $title' - prompt = u'Really DELETE %i file%s (y/n)?' % \ - (len(items), 's' if len(items) > 1 else '') - else: - fmt = '' - prompt = u'Really remove %i item%s from the library (y/n)?' % \ - (len(items), 's' if len(items) > 1 else '') + # Confirm file removal if not forcing removal. + if not force: + # Prepare confirmation with user. + print_() + if delete: + fmt = u'$path - $title' + prompt = u'Really DELETE %i file%s (y/n)?' % \ + (len(items), 's' if len(items) > 1 else '') + else: + fmt = u'' + prompt = u'Really remove %i item%s from the library (y/n)?' % \ + (len(items), 's' if len(items) > 1 else '') - # Show all the items. - for item in items: - ui.print_(format(item, fmt)) + # Show all the items. + for item in items: + ui.print_(format(item, fmt)) - # Confirm with user. - if not ui.input_yn(prompt, True): - return + # Confirm with user. + if not ui.input_yn(prompt, True): + return # Remove (and possibly delete) items. with lib.transaction(): @@ -1227,7 +1238,7 @@ def remove_items(lib, query, album, delete): def remove_func(lib, opts, args): - remove_items(lib, decargs(args), opts.album, opts.delete) + remove_items(lib, decargs(args), opts.album, opts.delete, opts.force) remove_cmd = ui.Subcommand( @@ -1237,6 +1248,10 @@ remove_cmd.parser.add_option( u"-d", u"--delete", action="store_true", help=u"also remove files from disk" ) +remove_cmd.parser.add_option( + u"-f", u"--force", action="store_true", + help=u"do not ask when removing items" +) remove_cmd.parser.add_album_option() remove_cmd.func = remove_func default_commands.append(remove_cmd) @@ -1310,6 +1325,7 @@ default_commands.append(stats_cmd) def show_version(lib, opts, args): print_(u'beets version %s' % beets.__version__) + print_(u'Python version {}'.format(python_version())) # Show plugins. names = sorted(p.name for p in plugins.find_plugins()) if names: @@ -1454,7 +1470,8 @@ default_commands.append(modify_cmd) # move: Move/copy files to the library or a new base directory. -def move_items(lib, dest, query, copy, album, pretend, confirm=False): +def move_items(lib, dest, query, copy, album, pretend, confirm=False, + export=False): """Moves or copies items to a new base directory, given by dest. If dest is None, then the library's base directory is used, making the command "consolidate" files. @@ -1467,6 +1484,7 @@ def move_items(lib, dest, query, copy, album, pretend, confirm=False): isalbummoved = lambda album: any(isitemmoved(i) for i in album.items()) objs = [o for o in objs if (isalbummoved if album else isitemmoved)(o)] + copy = copy or export # Exporting always copies. action = u'Copying' if copy else u'Moving' act = u'copy' if copy else u'move' entity = u'album' if album else u'item' @@ -1492,8 +1510,16 @@ def move_items(lib, dest, query, copy, album, pretend, confirm=False): for obj in objs: log.debug(u'moving: {0}', util.displayable_path(obj.path)) - obj.move(copy, basedir=dest) - obj.store() + if export: + # Copy without affecting the database. + obj.move(operation=MoveOperation.COPY, basedir=dest, + store=False) + else: + # Ordinary move/copy: store the new path. + if copy: + obj.move(operation=MoveOperation.COPY, basedir=dest) + else: + obj.move(operation=MoveOperation.MOVE, basedir=dest) def move_func(lib, opts, args): @@ -1504,7 +1530,7 @@ def move_func(lib, opts, args): raise ui.UserError(u'no such directory: %s' % dest) move_items(lib, dest, decargs(args), opts.copy, opts.album, opts.pretend, - opts.timid) + opts.timid, opts.export) move_cmd = ui.Subcommand( @@ -1526,6 +1552,10 @@ move_cmd.parser.add_option( u'-t', u'--timid', dest='timid', action='store_true', help=u'always confirm all actions' ) +move_cmd.parser.add_option( + u'-e', u'--export', default=False, action='store_true', + help=u'copy without changing the database path' +) move_cmd.parser.add_album_option() move_cmd.func = move_func default_commands.append(move_cmd) @@ -1601,7 +1631,7 @@ def config_func(lib, opts, args): filenames.insert(0, user_path) for filename in filenames: - print_(filename) + print_(displayable_path(filename)) # Open in editor. elif opts.edit: @@ -1609,7 +1639,8 @@ def config_func(lib, opts, args): # Dump configuration. else: - print_(config.dump(full=opts.defaults, redact=opts.redact)) + config_out = config.dump(full=opts.defaults, redact=opts.redact) + print_(util.text_string(config_out)) def config_edit(): @@ -1655,17 +1686,19 @@ default_commands.append(config_cmd) def print_completion(*args): for line in completion_script(default_commands + plugins.commands()): - print_(line, end='') + print_(line, end=u'') if not any(map(os.path.isfile, BASH_COMPLETION_PATHS)): - log.warn(u'Warning: Unable to find the bash-completion package. ' - u'Command line completion might not work.') + log.warning(u'Warning: Unable to find the bash-completion package. ' + u'Command line completion might not work.') BASH_COMPLETION_PATHS = map(syspath, [ u'/etc/bash_completion', u'/usr/share/bash-completion/bash_completion', - u'/usr/share/local/bash-completion/bash_completion', - u'/opt/local/share/bash-completion/bash_completion', # SmartOS - u'/usr/local/etc/bash_completion', # Homebrew + u'/usr/local/share/bash-completion/bash_completion', + # SmartOS + u'/opt/local/share/bash-completion/bash_completion', + # Homebrew (before bash-completion2) + u'/usr/local/etc/bash_completion', ]) @@ -1677,7 +1710,7 @@ def completion_script(commands): """ base_script = os.path.join(_package_path('beets.ui'), 'completion_base.sh') with open(base_script, 'r') as base_script: - yield base_script.read() + yield util.text_string(base_script.read()) options = {} aliases = {} @@ -1692,12 +1725,12 @@ def completion_script(commands): if re.match(r'^\w+$', alias): aliases[alias] = name - options[name] = {'flags': [], 'opts': []} + options[name] = {u'flags': [], u'opts': []} for opts in cmd.parser._get_all_options()[1:]: if opts.action in ('store_true', 'store_false'): - option_type = 'flags' + option_type = u'flags' else: - option_type = 'opts' + option_type = u'opts' options[name][option_type].extend( opts._short_opts + opts._long_opts @@ -1705,14 +1738,14 @@ def completion_script(commands): # Add global options options['_global'] = { - 'flags': [u'-v', u'--verbose'], - 'opts': u'-l --library -c --config -d --directory -h --help'.split( - u' ') + u'flags': [u'-v', u'--verbose'], + u'opts': + u'-l --library -c --config -d --directory -h --help'.split(u' ') } # Add flags common to all commands options['_common'] = { - 'flags': [u'-h', u'--help'] + u'flags': [u'-h', u'--help'] } # Start generating the script @@ -1725,21 +1758,24 @@ def completion_script(commands): # Command aliases yield u" local aliases='%s'\n" % ' '.join(aliases.keys()) for alias, cmd in aliases.items(): - yield u" local alias__%s=%s\n" % (alias, cmd) + yield u" local alias__%s=%s\n" % (alias.replace('-', '_'), cmd) yield u'\n' # Fields yield u" fields='%s'\n" % ' '.join( - set(library.Item._fields.keys() + library.Album._fields.keys()) + set( + list(library.Item._fields.keys()) + + list(library.Album._fields.keys()) + ) ) # Command options for cmd, opts in options.items(): for option_type, option_list in opts.items(): if option_list: - option_list = ' '.join(option_list) + option_list = u' '.join(option_list) yield u" local %s__%s='%s'\n" % ( - option_type, cmd, option_list) + option_type, cmd.replace('-', '_'), option_list) yield u' _beet_dispatch\n' yield u'}\n' diff --git a/libs/beets/ui/completion_base.sh b/libs/beets/ui/completion_base.sh index ce3fb6e2..1eaa4db3 100644 --- a/libs/beets/ui/completion_base.sh +++ b/libs/beets/ui/completion_base.sh @@ -70,7 +70,7 @@ _beet_dispatch() { # Replace command shortcuts if [[ -n $cmd ]] && _list_include_item "$aliases" "$cmd"; then - eval "cmd=\$alias__$cmd" + eval "cmd=\$alias__${cmd//-/_}" fi case $cmd in @@ -94,8 +94,8 @@ _beet_dispatch() { _beet_complete() { if [[ $cur == -* ]]; then local opts flags completions - eval "opts=\$opts__$cmd" - eval "flags=\$flags__$cmd" + eval "opts=\$opts__${cmd//-/_}" + eval "flags=\$flags__${cmd//-/_}" completions="${flags___common} ${opts} ${flags}" COMPREPLY+=( $(compgen -W "$completions" -- $cur) ) else @@ -129,7 +129,7 @@ _beet_complete_global() { COMPREPLY+=( $(compgen -W "$completions" -- $cur) ) elif [[ -n $cur ]] && _list_include_item "$aliases" "$cur"; then local cmd - eval "cmd=\$alias__$cur" + eval "cmd=\$alias__${cur//-/_}" COMPREPLY+=( "$cmd" ) else COMPREPLY+=( $(compgen -W "$commands" -- $cur) ) @@ -138,7 +138,7 @@ _beet_complete_global() { _beet_complete_query() { local opts - eval "opts=\$opts__$cmd" + eval "opts=\$opts__${cmd//-/_}" if [[ $cur == -* ]] || _list_include_item "$opts" "$prev"; then _beet_complete diff --git a/libs/beets/util/__init__.py b/libs/beets/util/__init__.py index 3cc270ae..69870edf 100644 --- a/libs/beets/util/__init__.py +++ b/libs/beets/util/__init__.py @@ -18,6 +18,8 @@ from __future__ import division, absolute_import, print_function import os import sys +import errno +import locale import re import shutil import fnmatch @@ -27,10 +29,14 @@ import subprocess import platform import shlex from beets.util import hidden +import six +from unidecode import unidecode +from enum import Enum MAX_FILENAME_LENGTH = 200 WINDOWS_MAGIC_PREFIX = u'\\\\?\\' +SNI_SUPPORTED = sys.version_info >= (2, 7, 9) class HumanReadableException(Exception): @@ -65,14 +71,14 @@ class HumanReadableException(Exception): def _reasonstr(self): """Get the reason as a string.""" - if isinstance(self.reason, unicode): + if isinstance(self.reason, six.text_type): return self.reason - elif isinstance(self.reason, basestring): # Byte string. - return self.reason.decode('utf8', 'ignore') + elif isinstance(self.reason, bytes): + return self.reason.decode('utf-8', 'ignore') elif hasattr(self.reason, 'strerror'): # i.e., EnvironmentError return self.reason.strerror else: - return u'"{0}"'.format(unicode(self.reason)) + return u'"{0}"'.format(six.text_type(self.reason)) def get_message(self): """Create the human-readable description of the error, sans @@ -119,6 +125,15 @@ class FilesystemError(HumanReadableException): return u'{0} {1}'.format(self._reasonstr(), clause) +class MoveOperation(Enum): + """The file operations that e.g. various move functions can carry out. + """ + MOVE = 0 + COPY = 1 + LINK = 2 + HARDLINK = 3 + + def normpath(path): """Provide the canonical form of the path suitable for storing in the database. @@ -158,15 +173,16 @@ def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None): pattern in `ignore` are skipped. If `logger` is provided, then warning messages are logged there when a directory cannot be listed. """ - # Make sure the path isn't a Unicode string. + # Make sure the pathes aren't Unicode strings. path = bytestring_path(path) + ignore = [bytestring_path(i) for i in ignore] # Get all the directories and files at this level. try: contents = os.listdir(syspath(path)) except OSError as exc: if logger: - logger.warn(u'could not list directory {0}: {1}'.format( + logger.warning(u'could not list directory {0}: {1}'.format( displayable_path(path), exc.strerror )) return @@ -264,7 +280,9 @@ def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')): if not os.path.exists(directory): # Directory gone already. continue - if fnmatch_all(os.listdir(directory), clutter): + clutter = [bytestring_path(c) for c in clutter] + match_paths = [bytestring_path(d) for d in os.listdir(directory)] + if fnmatch_all(match_paths, clutter): # Directory contains only clutter (or nothing). try: shutil.rmtree(directory) @@ -298,6 +316,18 @@ def components(path): return comps +def arg_encoding(): + """Get the encoding for command-line arguments (and other OS + locale-sensitive strings). + """ + try: + return locale.getdefaultlocale()[1] or 'utf-8' + except ValueError: + # Invalid locale environment variable setting. To avoid + # failing entirely for no good reason, assume UTF-8. + return 'utf-8' + + def _fsencoding(): """Get the system's filesystem encoding. On Windows, this is always UTF-8 (not MBCS). @@ -309,7 +339,7 @@ def _fsencoding(): # for Windows paths, so the encoding is actually immaterial so # we can avoid dealing with this nastiness. We arbitrarily # choose UTF-8. - encoding = 'utf8' + encoding = 'utf-8' return encoding @@ -327,11 +357,14 @@ def bytestring_path(path): if os.path.__name__ == 'ntpath' and path.startswith(WINDOWS_MAGIC_PREFIX): path = path[len(WINDOWS_MAGIC_PREFIX):] - # Try to encode with default encodings, but fall back to UTF8. + # Try to encode with default encodings, but fall back to utf-8. try: return path.encode(_fsencoding()) except (UnicodeError, LookupError): - return path.encode('utf8') + return path.encode('utf-8') + + +PATH_SEP = bytestring_path(os.sep) def displayable_path(path, separator=u'; '): @@ -341,16 +374,16 @@ def displayable_path(path, separator=u'; '): """ if isinstance(path, (list, tuple)): return separator.join(displayable_path(p) for p in path) - elif isinstance(path, unicode): + elif isinstance(path, six.text_type): return path elif not isinstance(path, bytes): # A non-string object: just get its unicode representation. - return unicode(path) + return six.text_type(path) try: return path.decode(_fsencoding(), 'ignore') except (UnicodeError, LookupError): - return path.decode('utf8', 'ignore') + return path.decode('utf-8', 'ignore') def syspath(path, prefix=True): @@ -364,12 +397,12 @@ def syspath(path, prefix=True): if os.path.__name__ != 'ntpath': return path - if not isinstance(path, unicode): + if not isinstance(path, six.text_type): # Beets currently represents Windows paths internally with UTF-8 # arbitrarily. But earlier versions used MBCS because it is # reported as the FS encoding by Windows. Try both. try: - path = path.decode('utf8') + path = path.decode('utf-8') except UnicodeError: # The encoding should always be MBCS, Windows' broken # Unicode representation. @@ -389,6 +422,8 @@ def syspath(path, prefix=True): def samefile(p1, p2): """Safer equality for paths.""" + if p1 == p2: + return True return shutil._samefile(syspath(p1), syspath(p2)) @@ -437,8 +472,7 @@ def move(path, dest, replace=False): path = syspath(path) dest = syspath(dest) if os.path.exists(dest) and not replace: - raise FilesystemError(u'file exists', 'rename', (path, dest), - traceback.format_exc()) + raise FilesystemError(u'file exists', 'rename', (path, dest)) # First, try renaming the file. try: @@ -456,23 +490,52 @@ def move(path, dest, replace=False): def link(path, dest, replace=False): """Create a symbolic link from path to `dest`. Raises an OSError if `dest` already exists, unless `replace` is True. Does nothing if - `path` == `dest`.""" - if (samefile(path, dest)): + `path` == `dest`. + """ + if samefile(path, dest): return - path = syspath(path) - dest = syspath(dest) - if os.path.exists(dest) and not replace: - raise FilesystemError(u'file exists', 'rename', (path, dest), - traceback.format_exc()) + if os.path.exists(syspath(dest)) and not replace: + raise FilesystemError(u'file exists', 'rename', (path, dest)) try: - os.symlink(path, dest) - except OSError: - raise FilesystemError(u'Operating system does not support symbolic ' - u'links.', 'link', (path, dest), + os.symlink(syspath(path), syspath(dest)) + except NotImplementedError: + # raised on python >= 3.2 and Windows versions before Vista + raise FilesystemError(u'OS does not support symbolic links.' + 'link', (path, dest), traceback.format_exc()) + except OSError as exc: + # TODO: Windows version checks can be removed for python 3 + if hasattr('sys', 'getwindowsversion'): + if sys.getwindowsversion()[0] < 6: # is before Vista + exc = u'OS does not support symbolic links.' + raise FilesystemError(exc, 'link', (path, dest), traceback.format_exc()) +def hardlink(path, dest, replace=False): + """Create a hard link from path to `dest`. Raises an OSError if + `dest` already exists, unless `replace` is True. Does nothing if + `path` == `dest`. + """ + if samefile(path, dest): + return + + if os.path.exists(syspath(dest)) and not replace: + raise FilesystemError(u'file exists', 'rename', (path, dest)) + try: + os.link(syspath(path), syspath(dest)) + except NotImplementedError: + raise FilesystemError(u'OS does not support hard links.' + 'link', (path, dest), traceback.format_exc()) + except OSError as exc: + if exc.errno == errno.EXDEV: + raise FilesystemError(u'Cannot hard link across devices.' + 'link', (path, dest), traceback.format_exc()) + else: + raise FilesystemError(exc, 'link', (path, dest), + traceback.format_exc()) + + def unique_path(path): """Returns a version of ``path`` that does not exist on the filesystem. Specifically, if ``path` itself already exists, then @@ -490,7 +553,8 @@ def unique_path(path): num = 0 while True: num += 1 - new_path = b'%s.%i%s' % (base, num, ext) + suffix = u'.{}'.format(num).encode() + ext + new_path = base + suffix if not os.path.exists(new_path): return new_path @@ -594,7 +658,7 @@ def legalize_path(path, replacements, length, extension, fragment): if fragment: # Outputting Unicode. - extension = extension.decode('utf8', 'ignore') + extension = extension.decode('utf-8', 'ignore') first_stage_path, _ = _legalize_stage( path, replacements, length, extension, fragment @@ -618,6 +682,24 @@ def legalize_path(path, replacements, length, extension, fragment): return second_stage_path, retruncated +def py3_path(path): + """Convert a bytestring path to Unicode on Python 3 only. On Python + 2, return the bytestring path unchanged. + + This helps deal with APIs on Python 3 that *only* accept Unicode + (i.e., `str` objects). I philosophically disagree with this + decision, because paths are sadly bytes on Unix, but that's the way + it is. So this function helps us "smuggle" the true bytes data + through APIs that took Python 3's Unicode mandate too seriously. + """ + if isinstance(path, six.text_type): + return path + assert isinstance(path, bytes) + if six.PY2: + return path + return os.fsdecode(path) + + def str2bool(value): """Returns a boolean reflecting a human-entered string.""" return value.lower() in (u'yes', u'1', u'true', u't', u'y') @@ -627,14 +709,32 @@ def as_string(value): """Convert a value to a Unicode object for matching with a query. None becomes the empty string. Bytestrings are silently decoded. """ + if six.PY2: + buffer_types = buffer, memoryview # noqa: F821 + else: + buffer_types = memoryview + if value is None: return u'' - elif isinstance(value, buffer): - return bytes(value).decode('utf8', 'ignore') + elif isinstance(value, buffer_types): + return bytes(value).decode('utf-8', 'ignore') elif isinstance(value, bytes): - return value.decode('utf8', 'ignore') + return value.decode('utf-8', 'ignore') else: - return unicode(value) + return six.text_type(value) + + +def text_string(value, encoding='utf-8'): + """Convert a string, which can either be bytes or unicode, to + unicode. + + Text (unicode) is left untouched; bytes are decoded. This is useful + to convert from a "native string" (bytes on Python 2, str on Python + 3) to a consistently unicode value. + """ + if isinstance(value, bytes): + return value.decode(encoding) + return value def plurality(objs): @@ -661,7 +761,7 @@ def cpu_count(): num = 0 elif sys.platform == 'darwin': try: - num = int(command_output([b'/usr/sbin/sysctl', b'-n', b'hw.ncpu'])) + num = int(command_output(['/usr/sbin/sysctl', '-n', 'hw.ncpu'])) except (ValueError, OSError, subprocess.CalledProcessError): num = 0 else: @@ -675,10 +775,28 @@ def cpu_count(): return 1 +def convert_command_args(args): + """Convert command arguments to bytestrings on Python 2 and + surrogate-escaped strings on Python 3.""" + assert isinstance(args, list) + + def convert(arg): + if six.PY2: + if isinstance(arg, six.text_type): + arg = arg.encode(arg_encoding()) + else: + if isinstance(arg, bytes): + arg = arg.decode(arg_encoding(), 'surrogateescape') + return arg + + return [convert(a) for a in args] + + def command_output(cmd, shell=False): """Runs the command and returns its output after it has exited. - ``cmd`` is a list of byte string arguments starting with the command names. + ``cmd`` is a list of arguments starting with the command names. The + arguments are bytes on Unix and strings on Windows. If ``shell`` is true, ``cmd`` is assumed to be a string and passed to a shell to execute. @@ -689,10 +807,18 @@ def command_output(cmd, shell=False): This replaces `subprocess.check_output` which can have problems if lots of output is sent to stderr. """ + cmd = convert_command_args(cmd) + + try: # python >= 3.3 + devnull = subprocess.DEVNULL + except AttributeError: + devnull = open(os.devnull, 'r+b') + proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + stdin=devnull, close_fds=platform.system() != 'Windows', shell=shell ) @@ -700,7 +826,7 @@ def command_output(cmd, shell=False): if proc.returncode: raise subprocess.CalledProcessError( returncode=proc.returncode, - cmd=b' '.join(cmd), + cmd=' '.join(cmd), output=stdout + stderr, ) return stdout @@ -756,15 +882,14 @@ def shlex_split(s): Raise `ValueError` if the string is not a well-formed shell string. This is a workaround for a bug in some versions of Python. """ - if isinstance(s, bytes): - # Shlex works fine. + if not six.PY2 or isinstance(s, bytes): # Shlex works fine. return shlex.split(s) - elif isinstance(s, unicode): + elif isinstance(s, six.text_type): # Work around a Python bug. # http://bugs.python.org/issue6988 - bs = s.encode('utf8') - return [c.decode('utf8') for c in shlex.split(bs)] + bs = s.encode('utf-8') + return [c.decode('utf-8') for c in shlex.split(bs)] else: raise TypeError(u'shlex_split called with non-string') @@ -796,8 +921,8 @@ def _windows_long_path_name(short_path): """Use Windows' `GetLongPathNameW` via ctypes to get the canonical, long path given a short filename. """ - if not isinstance(short_path, unicode): - short_path = unicode(short_path) + if not isinstance(short_path, six.text_type): + short_path = short_path.decode(_fsencoding()) import ctypes buf = ctypes.create_unicode_buffer(260) @@ -860,3 +985,27 @@ def raw_seconds_short(string): raise ValueError(u'String not in M:SS format') minutes, seconds = map(int, match.groups()) return float(minutes * 60 + seconds) + + +def asciify_path(path, sep_replace): + """Decodes all unicode characters in a path into ASCII equivalents. + + Substitutions are provided by the unidecode module. Path separators in the + input are preserved. + + Keyword arguments: + path -- The path to be asciified. + sep_replace -- the string to be used to replace extraneous path separators. + """ + # if this platform has an os.altsep, change it to os.sep. + if os.altsep: + path = path.replace(os.altsep, os.sep) + path_components = path.split(os.sep) + for index, item in enumerate(path_components): + path_components[index] = unidecode(item).replace(os.sep, sep_replace) + if os.altsep: + path_components[index] = unidecode(item).replace( + os.altsep, + sep_replace + ) + return os.sep.join(path_components) diff --git a/libs/beets/util/artresizer.py b/libs/beets/util/artresizer.py index 6970a7da..e5117a6a 100644 --- a/libs/beets/util/artresizer.py +++ b/libs/beets/util/artresizer.py @@ -18,21 +18,24 @@ public resizing proxy if neither is available. """ from __future__ import division, absolute_import, print_function -import urllib import subprocess import os import re from tempfile import NamedTemporaryFile - +from six.moves.urllib.parse import urlencode from beets import logging from beets import util +import six # Resizing methods PIL = 1 IMAGEMAGICK = 2 WEBPROXY = 3 -PROXY_URL = 'http://images.weserv.nl/' +if util.SNI_SUPPORTED: + PROXY_URL = 'https://images.weserv.nl/' +else: + PROXY_URL = 'http://images.weserv.nl/' log = logging.getLogger('beets') @@ -41,9 +44,9 @@ def resize_url(url, maxwidth): """Return a proxied image URL that resizes the original image to maxwidth (preserving aspect ratio). """ - return '{0}?{1}'.format(PROXY_URL, urllib.urlencode({ + return '{0}?{1}'.format(PROXY_URL, urlencode({ 'url': url.replace('http://', ''), - 'w': bytes(maxwidth), + 'w': maxwidth, })) @@ -52,8 +55,8 @@ def temp_file_for(path): specified path. """ ext = os.path.splitext(path)[1] - with NamedTemporaryFile(suffix=ext, delete=False) as f: - return f.name + with NamedTemporaryFile(suffix=util.py3_path(ext), delete=False) as f: + return util.bytestring_path(f.name) def pil_resize(maxwidth, path_in, path_out=None): @@ -85,19 +88,18 @@ def im_resize(maxwidth, path_in, path_out=None): log.debug(u'artresizer: ImageMagick resizing {0} to {1}', util.displayable_path(path_in), util.displayable_path(path_out)) - # "-resize widthxheight>" shrinks images with dimension(s) larger - # than the corresponding width and/or height dimension(s). The > - # "only shrink" flag is prefixed by ^ escape char for Windows - # compatibility. + # "-resize WIDTHx>" shrinks images with the width larger + # than the given width while maintaining the aspect ratio + # with regards to the height. try: util.command_output([ - b'convert', util.syspath(path_in, prefix=False), - b'-resize', b'{0}x^>'.format(maxwidth), + 'convert', util.syspath(path_in, prefix=False), + '-resize', '{0}x>'.format(maxwidth), util.syspath(path_out, prefix=False), ]) except subprocess.CalledProcessError: - log.warn(u'artresizer: IM convert failed for {0}', - util.displayable_path(path_in)) + log.warning(u'artresizer: IM convert failed for {0}', + util.displayable_path(path_in)) return path_in return path_out @@ -119,12 +121,12 @@ def pil_getsize(path_in): def im_getsize(path_in): - cmd = [b'identify', b'-format', b'%w %h', + cmd = ['identify', '-format', '%w %h', util.syspath(path_in, prefix=False)] try: out = util.command_output(cmd) except subprocess.CalledProcessError as exc: - log.warn(u'ImageMagick size query failed') + log.warning(u'ImageMagick size query failed') log.debug( u'`convert` exited with (status {}) when ' u'getting size with command {}:\n{}', @@ -134,7 +136,7 @@ def im_getsize(path_in): try: return tuple(map(int, out.split(b' '))) except IndexError: - log.warn(u'Could not understand IM output: {0!r}', out) + log.warning(u'Could not understand IM output: {0!r}', out) BACKEND_GET_SIZE = { @@ -149,21 +151,20 @@ class Shareable(type): lazily-created shared instance of ``MyClass`` while calling ``MyClass()`` to construct a new object works as usual. """ - def __init__(self, name, bases, dict): - super(Shareable, self).__init__(name, bases, dict) - self._instance = None + def __init__(cls, name, bases, dict): + super(Shareable, cls).__init__(name, bases, dict) + cls._instance = None @property - def shared(self): - if self._instance is None: - self._instance = self() - return self._instance + def shared(cls): + if cls._instance is None: + cls._instance = cls() + return cls._instance -class ArtResizer(object): +class ArtResizer(six.with_metaclass(Shareable, object)): """A singleton class that performs image resizes. """ - __metaclass__ = Shareable def __init__(self): """Create a resizer object with an inferred method. @@ -231,12 +232,13 @@ class ArtResizer(object): def get_im_version(): """Return Image Magick version or None if it is unavailable - Try invoking ImageMagick's "convert".""" + Try invoking ImageMagick's "convert". + """ try: - out = util.command_output([b'identify', b'--version']) + out = util.command_output(['convert', '--version']) - if 'imagemagick' in out.lower(): - pattern = r".+ (\d+)\.(\d+)\.(\d+).*" + if b'imagemagick' in out.lower(): + pattern = br".+ (\d+)\.(\d+)\.(\d+).*" match = re.search(pattern, out) if match: return (int(match.group(1)), @@ -244,7 +246,8 @@ def get_im_version(): int(match.group(3))) return (0,) - except (subprocess.CalledProcessError, OSError): + except (subprocess.CalledProcessError, OSError) as exc: + log.debug(u'ImageMagick check `convert --version` failed: {}', exc) return None diff --git a/libs/beets/util/bluelet.py b/libs/beets/util/bluelet.py index d81c2919..0da17559 100644 --- a/libs/beets/util/bluelet.py +++ b/libs/beets/util/bluelet.py @@ -9,6 +9,7 @@ Bluelet: easy concurrency without all the messy parallelism. """ from __future__ import division, absolute_import, print_function +import six import socket import select import sys @@ -19,20 +20,6 @@ import time import collections -# A little bit of "six" (Python 2/3 compatibility): cope with PEP 3109 syntax -# changes. - -PY3 = sys.version_info[0] == 3 -if PY3: - def _reraise(typ, exc, tb): - raise exc.with_traceback(tb) -else: - exec(""" -def _reraise(typ, exc, tb): - raise typ, exc, tb -""") - - # Basic events used for thread scheduling. class Event(object): @@ -214,7 +201,7 @@ class ThreadException(Exception): self.exc_info = exc_info def reraise(self): - _reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2]) + six.reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2]) SUSPENDED = Event() # Special sentinel placeholder for suspended threads. @@ -282,7 +269,7 @@ def run(root_coro): except StopIteration: # Thread is done. complete_thread(coro, None) - except: + except BaseException: # Thread raised some other exception. del threads[coro] raise ThreadException(coro, sys.exc_info()) @@ -379,7 +366,7 @@ def run(root_coro): exit_te = te break - except: + except BaseException: # For instance, KeyboardInterrupt during select(). Raise # into root thread and terminate others. threads = {root_coro: ExceptionEvent(sys.exc_info())} diff --git a/libs/beets/util/confit.py b/libs/beets/util/confit.py index aa49f6f1..b5513f48 100644 --- a/libs/beets/util/confit.py +++ b/libs/beets/util/confit.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# This file is part of Confit. +# This file is part of Confuse. # Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining @@ -24,10 +24,7 @@ import sys import yaml import collections import re -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict +from collections import OrderedDict UNIX_DIR_VAR = 'XDG_CONFIG_HOME' UNIX_DIR_FALLBACK = '~/.config' @@ -47,9 +44,9 @@ REDACTED_TOMBSTONE = 'REDACTED' # Utilities. PY3 = sys.version_info[0] == 3 -STRING = str if PY3 else unicode -BASESTRING = str if PY3 else basestring -NUMERIC_TYPES = (int, float) if PY3 else (int, float, long) +STRING = str if PY3 else unicode # noqa: F821 +BASESTRING = str if PY3 else basestring # noqa: F821 +NUMERIC_TYPES = (int, float) if PY3 else (int, float, long) # noqa: F821 def iter_first(sequence): @@ -248,10 +245,15 @@ class ConfigView(object): def set_args(self, namespace): """Overlay parsed command-line arguments, generated by a library - like argparse or optparse, onto this view's value. + like argparse or optparse, onto this view's value. ``namespace`` + can be a ``dict`` or namespace object. """ args = {} - for key, value in namespace.__dict__.items(): + if isinstance(namespace, dict): + items = namespace.items() + else: + items = namespace.__dict__.items() + for key, value in items: if value is not None: # Avoid unset options. args[key] = value self.set(args) @@ -386,19 +388,42 @@ class ConfigView(object): """ return as_template(template).value(self, template) - # Old validation methods (deprecated). + # Shortcuts for common templates. def as_filename(self): + """Get the value as a path. Equivalent to `get(Filename())`. + """ return self.get(Filename()) def as_choice(self, choices): + """Get the value from a list of choices. Equivalent to + `get(Choice(choices))`. + """ return self.get(Choice(choices)) def as_number(self): + """Get the value as any number type: int or float. Equivalent to + `get(Number())`. + """ return self.get(Number()) - def as_str_seq(self): - return self.get(StrSeq()) + def as_str_seq(self, split=True): + """Get the value as a sequence of strings. Equivalent to + `get(StrSeq())`. + """ + return self.get(StrSeq(split=split)) + + def as_pairs(self, default_value=None): + """Get the value as a sequence of pairs of two strings. Equivalent to + `get(Pairs())`. + """ + return self.get(Pairs(default_value=default_value)) + + def as_str(self): + """Get the value as a (Unicode) string. Equivalent to + `get(unicode)` on Python 2 and `get(str)` on Python 3. + """ + return self.get(String()) # Redaction. @@ -484,11 +509,10 @@ class Subview(ConfigView): self.name += '.' if isinstance(self.key, int): self.name += u'#{0}'.format(self.key) - elif isinstance(self.key, BASESTRING): - if isinstance(self.key, bytes): - self.name += self.key.decode('utf8') - else: - self.name += self.key + elif isinstance(self.key, bytes): + self.name += self.key.decode('utf-8') + elif isinstance(self.key, STRING): + self.name += self.key else: self.name += repr(self.key) @@ -650,7 +674,7 @@ def load_yaml(filename): parsed, a ConfigReadError is raised. """ try: - with open(filename, 'r') as f: + with open(filename, 'rb') as f: return yaml.load(f, Loader=Loader) except (IOError, yaml.error.YAMLError) as exc: raise ConfigReadError(filename, exc) @@ -890,9 +914,10 @@ class Configuration(RootView): default_source = source break if default_source and default_source.filename: - with open(default_source.filename, 'r') as fp: + with open(default_source.filename, 'rb') as fp: default_data = fp.read() - yaml_out = restore_yaml_comments(yaml_out, default_data) + yaml_out = restore_yaml_comments(yaml_out, + default_data.decode('utf8')) return yaml_out @@ -953,7 +978,7 @@ should be raised when the value is missing. class Template(object): """A value template for configuration fields. - The template works like a type and instructs Confit about how to + The template works like a type and instructs Confuse about how to interpret a deserialized YAML value. This includes type conversions, providing a default value, and validating for errors. For example, a filepath type might expand tildes and check that the file exists. @@ -1223,30 +1248,77 @@ class StrSeq(Template): super(StrSeq, self).__init__() self.split = split + def _convert_value(self, x, view): + if isinstance(x, STRING): + return x + elif isinstance(x, bytes): + return x.decode('utf-8', 'ignore') + else: + self.fail(u'must be a list of strings', view, True) + def convert(self, value, view): if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') + value = value.decode('utf-8', 'ignore') if isinstance(value, STRING): if self.split: - return value.split() + value = value.split() else: - return [value] + value = [value] + else: + try: + value = list(value) + except TypeError: + self.fail(u'must be a whitespace-separated string or a list', + view, True) + return [self._convert_value(v, view) for v in value] + + +class Pairs(StrSeq): + """A template for ordered key-value pairs. + + This can either be given with the same syntax as for `StrSeq` (i.e. without + values), or as a list of strings and/or single-element mappings such as:: + + - key: value + - [key, value] + - key + + The result is a list of two-element tuples. If no value is provided, the + `default_value` will be returned as the second element. + """ + + def __init__(self, default_value=None): + """Create a new template. + + `default` is the dictionary value returned for items that are not + a mapping, but a single string. + """ + super(Pairs, self).__init__(split=True) + self.default_value = default_value + + def _convert_value(self, x, view): try: - value = list(value) - except TypeError: - self.fail(u'must be a whitespace-separated string or a list', - view, True) - - def convert(x): - if isinstance(x, STRING): - return x - elif isinstance(x, bytes): - return x.decode('utf8', 'ignore') + return (super(Pairs, self)._convert_value(x, view), + self.default_value) + except ConfigTypeError: + if isinstance(x, collections.Mapping): + if len(x) != 1: + self.fail(u'must be a single-element mapping', view, True) + k, v = iter_first(x.items()) + elif isinstance(x, collections.Sequence): + if len(x) != 2: + self.fail(u'must be a two-element list', view, True) + k, v = x else: - self.fail(u'must be a list of strings', view, True) - return list(map(convert, value)) + # Is this even possible? -> Likely, if some !directive cause + # YAML to parse this to some custom type. + self.fail(u'must be a single string, mapping, or a list' + u'' + str(x), + view, True) + return (super(Pairs, self)._convert_value(k, view), + super(Pairs, self)._convert_value(v, view)) class Filename(Template): diff --git a/libs/beets/util/functemplate.py b/libs/beets/util/functemplate.py index 05f0892c..0e13db4a 100644 --- a/libs/beets/util/functemplate.py +++ b/libs/beets/util/functemplate.py @@ -33,8 +33,8 @@ import re import ast import dis import types - -from .confit import NUMERIC_TYPES +import sys +import six SYMBOL_DELIM = u'$' FUNC_DELIM = u'%' @@ -74,11 +74,11 @@ def ex_literal(val): """ if val is None: return ast.Name('None', ast.Load()) - elif isinstance(val, NUMERIC_TYPES): + elif isinstance(val, six.integer_types): return ast.Num(val) elif isinstance(val, bool): return ast.Name(bytes(val), ast.Load()) - elif isinstance(val, basestring): + elif isinstance(val, six.string_types): return ast.Str(val) raise TypeError(u'no literal for {0}'.format(type(val))) @@ -97,7 +97,7 @@ def ex_call(func, args): function may be an expression or the name of a function. Each argument may be an expression or a value to be used as a literal. """ - if isinstance(func, basestring): + if isinstance(func, six.string_types): func = ex_rvalue(func) args = list(args) @@ -105,7 +105,10 @@ def ex_call(func, args): if not isinstance(args[i], ast.expr): args[i] = ex_literal(args[i]) - return ast.Call(func, args, [], None, None) + if sys.version_info[:2] < (3, 5): + return ast.Call(func, args, [], None, None) + else: + return ast.Call(func, args, []) def compile_func(arg_names, statements, name='_the_func', debug=False): @@ -113,16 +116,31 @@ def compile_func(arg_names, statements, name='_the_func', debug=False): the resulting Python function. If `debug`, then print out the bytecode of the compiled function. """ - func_def = ast.FunctionDef( - name.encode('utf8'), - ast.arguments( - [ast.Name(n, ast.Param()) for n in arg_names], - None, None, - [ex_literal(None) for _ in arg_names], - ), - statements, - [], - ) + if six.PY2: + func_def = ast.FunctionDef( + name=name.encode('utf-8'), + args=ast.arguments( + args=[ast.Name(n, ast.Param()) for n in arg_names], + vararg=None, + kwarg=None, + defaults=[ex_literal(None) for _ in arg_names], + ), + body=statements, + decorator_list=[], + ) + else: + func_def = ast.FunctionDef( + name=name, + args=ast.arguments( + args=[ast.arg(arg=n, annotation=None) for n in arg_names], + kwonlyargs=[], + kw_defaults=[], + defaults=[ex_literal(None) for _ in arg_names], + ), + body=statements, + decorator_list=[], + ) + mod = ast.Module([func_def]) ast.fix_missing_locations(mod) @@ -164,8 +182,12 @@ class Symbol(object): def translate(self): """Compile the variable lookup.""" - expr = ex_rvalue(VARIABLE_PREFIX + self.ident.encode('utf8')) - return [expr], set([self.ident.encode('utf8')]), set() + if six.PY2: + ident = self.ident.encode('utf-8') + else: + ident = self.ident + expr = ex_rvalue(VARIABLE_PREFIX + ident) + return [expr], set([ident]), set() class Call(object): @@ -190,15 +212,19 @@ class Call(object): except Exception as exc: # Function raised exception! Maybe inlining the name of # the exception will help debug. - return u'<%s>' % unicode(exc) - return unicode(out) + return u'<%s>' % six.text_type(exc) + return six.text_type(out) else: return self.original def translate(self): """Compile the function call.""" varnames = set() - funcnames = set([self.ident.encode('utf8')]) + if six.PY2: + ident = self.ident.encode('utf-8') + else: + ident = self.ident + funcnames = set([ident]) arg_exprs = [] for arg in self.args: @@ -213,14 +239,14 @@ class Call(object): [ex_call( 'map', [ - ex_rvalue('unicode'), + ex_rvalue(six.text_type.__name__), ast.List(subexprs, ast.Load()), ] )], )) subexpr_call = ex_call( - FUNCTION_PREFIX + self.ident.encode('utf8'), + FUNCTION_PREFIX + ident, arg_exprs ) return [subexpr_call], varnames, funcnames @@ -242,11 +268,11 @@ class Expression(object): """ out = [] for part in self.parts: - if isinstance(part, basestring): + if isinstance(part, six.string_types): out.append(part) else: out.append(part.evaluate(env)) - return u''.join(map(unicode, out)) + return u''.join(map(six.text_type, out)) def translate(self): """Compile the expression to a list of Python AST expressions, a @@ -256,7 +282,7 @@ class Expression(object): varnames = set() funcnames = set() for part in self.parts: - if isinstance(part, basestring): + if isinstance(part, six.string_types): expressions.append(ex_literal(part)) else: e, v, f = part.translate() @@ -285,16 +311,24 @@ class Parser(object): replaced with a real, accepted parsing technique (PEG, parser generator, etc.). """ - def __init__(self, string): + def __init__(self, string, in_argument=False): + """ Create a new parser. + :param in_arguments: boolean that indicates the parser is to be + used for parsing function arguments, ie. considering commas + (`ARG_SEP`) a special character + """ self.string = string + self.in_argument = in_argument self.pos = 0 self.parts = [] # Common parsing resources. special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE, - ARG_SEP, ESCAPE_CHAR) - special_char_re = re.compile(r'[%s]|$' % + ESCAPE_CHAR) + special_char_re = re.compile(r'[%s]|\Z' % u''.join(re.escape(c) for c in special_chars)) + escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP) + terminator_chars = (GROUP_CLOSE,) def parse_expression(self): """Parse a template expression starting at ``pos``. Resulting @@ -302,16 +336,29 @@ class Parser(object): the ``parts`` field, a list. The ``pos`` field is updated to be the next character after the expression. """ + # Append comma (ARG_SEP) to the list of special characters only when + # parsing function arguments. + extra_special_chars = () + special_char_re = self.special_char_re + if self.in_argument: + extra_special_chars = (ARG_SEP,) + special_char_re = re.compile( + r'[%s]|\Z' % u''.join( + re.escape(c) for c in + self.special_chars + extra_special_chars + ) + ) + text_parts = [] while self.pos < len(self.string): char = self.string[self.pos] - if char not in self.special_chars: + if char not in self.special_chars + extra_special_chars: # A non-special character. Skip to the next special # character, treating the interstice as literal text. next_pos = ( - self.special_char_re.search( + special_char_re.search( self.string[self.pos:]).start() + self.pos ) text_parts.append(self.string[self.pos:next_pos]) @@ -322,14 +369,14 @@ class Parser(object): # The last character can never begin a structure, so we # just interpret it as a literal character (unless it # terminates the expression, as with , and }). - if char not in (GROUP_CLOSE, ARG_SEP): + if char not in self.terminator_chars + extra_special_chars: text_parts.append(char) self.pos += 1 break next_char = self.string[self.pos + 1] - if char == ESCAPE_CHAR and next_char in \ - (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP): + if char == ESCAPE_CHAR and next_char in (self.escapable_chars + + extra_special_chars): # An escaped special character ($$, $}, etc.). Note that # ${ is not an escape sequence: this is ambiguous with # the start of a symbol and it's not necessary (just @@ -349,7 +396,7 @@ class Parser(object): elif char == FUNC_DELIM: # Parse a function call. self.parse_call() - elif char in (GROUP_CLOSE, ARG_SEP): + elif char in self.terminator_chars + extra_special_chars: # Template terminated. break elif char == GROUP_OPEN: @@ -457,7 +504,7 @@ class Parser(object): expressions = [] while self.pos < len(self.string): - subparser = Parser(self.string[self.pos:]) + subparser = Parser(self.string[self.pos:], in_argument=True) subparser.parse_expression() # Extract and advance past the parsed expression. @@ -526,8 +573,9 @@ class Template(object): """ try: res = self.compiled(values, functions) - except: # Handle any exceptions thrown by compiled version. + except Exception: # Handle any exceptions thrown by compiled version. res = self.interpret(values, functions) + return res def translate(self): @@ -563,7 +611,7 @@ if __name__ == '__main__': import timeit _tmpl = Template(u'foo $bar %baz{foozle $bar barzle} $bar') _vars = {'bar': 'qux'} - _funcs = {'baz': unicode.upper} + _funcs = {'baz': six.text_type.upper} interp_time = timeit.timeit('_tmpl.interpret(_vars, _funcs)', 'from __main__ import _tmpl, _vars, _funcs', number=10000) diff --git a/libs/beets/util/hidden.py b/libs/beets/util/hidden.py index 262d371e..ed97f2bf 100644 --- a/libs/beets/util/hidden.py +++ b/libs/beets/util/hidden.py @@ -20,6 +20,7 @@ import os import stat import ctypes import sys +import beets.util def _is_hidden_osx(path): @@ -27,7 +28,7 @@ def _is_hidden_osx(path): This uses os.lstat to work out if a file has the "hidden" flag. """ - file_stat = os.lstat(path) + file_stat = os.lstat(beets.util.syspath(path)) if hasattr(file_stat, 'st_flags') and hasattr(stat, 'UF_HIDDEN'): return bool(file_stat.st_flags & stat.UF_HIDDEN) @@ -45,7 +46,7 @@ def _is_hidden_win(path): hidden_mask = 2 # Retrieve the attributes for the file. - attrs = ctypes.windll.kernel32.GetFileAttributesW(path) + attrs = ctypes.windll.kernel32.GetFileAttributesW(beets.util.syspath(path)) # Ensure we have valid attribues and compare them against the mask. return attrs >= 0 and attrs & hidden_mask @@ -56,11 +57,12 @@ def _is_hidden_dot(path): Files starting with a dot are seen as "hidden" files on Unix-based OSes. """ - return os.path.basename(path).startswith('.') + return os.path.basename(path).startswith(b'.') def is_hidden(path): - """Return whether or not a file is hidden. + """Return whether or not a file is hidden. `path` should be a + bytestring filename. This method works differently depending on the platform it is called on. @@ -73,10 +75,6 @@ def is_hidden(path): On any other operating systems (i.e. Linux), it uses `is_hidden_dot` to work out if a file is hidden. """ - # Convert the path to unicode if it is not already. - if not isinstance(path, unicode): - path = path.decode('utf-8') - # Run platform specific functions depending on the platform if sys.platform == 'darwin': return _is_hidden_osx(path) or _is_hidden_dot(path) diff --git a/libs/beets/util/pipeline.py b/libs/beets/util/pipeline.py index b5f77733..39bc7152 100644 --- a/libs/beets/util/pipeline.py +++ b/libs/beets/util/pipeline.py @@ -34,9 +34,10 @@ in place of any single coroutine. from __future__ import division, absolute_import, print_function -import Queue +from six.moves import queue from threading import Thread, Lock import sys +import six BUBBLE = '__PIPELINE_BUBBLE__' POISON = '__PIPELINE_POISON__' @@ -63,7 +64,17 @@ def _invalidate_queue(q, val=None, sync=True): q.mutex.acquire() try: - q.maxsize = 0 + # Originally, we set `maxsize` to 0 here, which is supposed to mean + # an unlimited queue size. However, there is a race condition since + # Python 3.2 when this attribute is changed while another thread is + # waiting in put()/get() due to a full/empty queue. + # Setting it to 2 is still hacky because Python does not give any + # guarantee what happens if Queue methods/attributes are overwritten + # when it is already in use. However, because of our dummy _put() + # and _get() methods, it provides a workaround to let the queue appear + # to be never empty or full. + # See issue https://github.com/beetbox/beets/issues/2078 + q.maxsize = 2 q._qsize = _qsize q._put = _put q._get = _get @@ -75,13 +86,13 @@ def _invalidate_queue(q, val=None, sync=True): q.mutex.release() -class CountedQueue(Queue.Queue): +class CountedQueue(queue.Queue): """A queue that keeps track of the number of threads that are still feeding into it. The queue is poisoned when all threads are finished with the queue. """ def __init__(self, maxsize=0): - Queue.Queue.__init__(self, maxsize) + queue.Queue.__init__(self, maxsize) self.nthreads = 0 self.poisoned = False @@ -259,7 +270,7 @@ class FirstPipelineThread(PipelineThread): return self.out_queue.put(msg) - except: + except BaseException: self.abort_all(sys.exc_info()) return @@ -307,7 +318,7 @@ class MiddlePipelineThread(PipelineThread): return self.out_queue.put(msg) - except: + except BaseException: self.abort_all(sys.exc_info()) return @@ -346,7 +357,7 @@ class LastPipelineThread(PipelineThread): # Send to consumer. self.coro.send(msg) - except: + except BaseException: self.abort_all(sys.exc_info()) return @@ -411,10 +422,10 @@ class Pipeline(object): try: # Using a timeout allows us to receive KeyboardInterrupt # exceptions during the join(). - while threads[-1].isAlive(): + while threads[-1].is_alive(): threads[-1].join(1) - except: + except BaseException: # Stop all the threads immediately. for thread in threads: thread.abort() @@ -431,7 +442,7 @@ class Pipeline(object): exc_info = thread.exc_info if exc_info: # Make the exception appear as it was raised originally. - raise exc_info[0], exc_info[1], exc_info[2] + six.reraise(exc_info[0], exc_info[1], exc_info[2]) def pull(self): """Yield elements from the end of the pipeline. Runs the stages diff --git a/libs/beetsplug/absubmit.py b/libs/beetsplug/absubmit.py new file mode 100644 index 00000000..0c288b9d --- /dev/null +++ b/libs/beetsplug/absubmit.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2016, Pieter Mulder. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Calculate acoustic information and submit to AcousticBrainz. +""" + +from __future__ import division, absolute_import, print_function + +import errno +import hashlib +import json +import os +import subprocess +import tempfile + +from distutils.spawn import find_executable +import requests + +from beets import plugins +from beets import util +from beets import ui + + +class ABSubmitError(Exception): + """Raised when failing to analyse file with extractor.""" + + +def call(args): + """Execute the command and return its output. + + Raise a AnalysisABSubmitError on failure. + """ + try: + return util.command_output(args) + except subprocess.CalledProcessError as e: + raise ABSubmitError( + u'{0} exited with status {1}'.format(args[0], e.returncode) + ) + + +class AcousticBrainzSubmitPlugin(plugins.BeetsPlugin): + + def __init__(self): + super(AcousticBrainzSubmitPlugin, self).__init__() + + self.config.add({'extractor': u''}) + + self.extractor = self.config['extractor'].as_str() + if self.extractor: + self.extractor = util.normpath(self.extractor) + # Expicit path to extractor + if not os.path.isfile(self.extractor): + raise ui.UserError( + u'Extractor command does not exist: {0}.'. + format(self.extractor) + ) + else: + # Implicit path to extractor, search for it in path + self.extractor = 'streaming_extractor_music' + try: + call([self.extractor]) + except OSError: + raise ui.UserError( + u'No extractor command found: please install the ' + u'extractor binary from http://acousticbrainz.org/download' + ) + except ABSubmitError: + # Extractor found, will exit with an error if not called with + # the correct amount of arguments. + pass + + # Get the executable location on the system, which we need + # to calculate the SHA-1 hash. + self.extractor = find_executable(self.extractor) + + # Calculate extractor hash. + self.extractor_sha = hashlib.sha1() + with open(self.extractor, 'rb') as extractor: + self.extractor_sha.update(extractor.read()) + self.extractor_sha = self.extractor_sha.hexdigest() + + base_url = 'https://acousticbrainz.org/api/v1/{mbid}/low-level' + + def commands(self): + cmd = ui.Subcommand( + 'absubmit', + help=u'calculate and submit AcousticBrainz analysis' + ) + cmd.func = self.command + return [cmd] + + def command(self, lib, opts, args): + # Get items from arguments + items = lib.items(ui.decargs(args)) + for item in items: + analysis = self._get_analysis(item) + if analysis: + self._submit_data(item, analysis) + + def _get_analysis(self, item): + mbid = item['mb_trackid'] + # If file has no mbid skip it. + if not mbid: + self._log.info(u'Not analysing {}, missing ' + u'musicbrainz track id.', item) + return None + + # Temporary file to save extractor output to, extractor only works + # if an output file is given. Here we use a temporary file to copy + # the data into a python object and then remove the file from the + # system. + tmp_file, filename = tempfile.mkstemp(suffix='.json') + try: + # Close the file, so the extractor can overwrite it. + os.close(tmp_file) + try: + call([self.extractor, util.syspath(item.path), filename]) + except ABSubmitError as e: + self._log.warning( + u'Failed to analyse {item} for AcousticBrainz: {error}', + item=item, error=e + ) + return None + with open(filename, 'rb') as tmp_file: + analysis = json.load(tmp_file) + # Add the hash to the output. + analysis['metadata']['version']['essentia_build_sha'] = \ + self.extractor_sha + return analysis + finally: + try: + os.remove(filename) + except OSError as e: + # ENOENT means file does not exist, just ignore this error. + if e.errno != errno.ENOENT: + raise + + def _submit_data(self, item, data): + mbid = item['mb_trackid'] + headers = {'Content-Type': 'application/json'} + response = requests.post(self.base_url.format(mbid=mbid), + json=data, headers=headers) + # Test that request was successful and raise an error on failure. + if response.status_code != 200: + try: + message = response.json()['message'] + except (ValueError, KeyError) as e: + message = u'unable to get error message: {}'.format(e) + self._log.error( + u'Failed to submit AcousticBrainz analysis of {item}: ' + u'{message}).', item=item, message=message + ) + else: + self._log.debug(u'Successfully submitted AcousticBrainz analysis ' + u'for {}.', item) diff --git a/libs/beetsplug/acousticbrainz.py b/libs/beetsplug/acousticbrainz.py index df790b26..f4960c30 100644 --- a/libs/beetsplug/acousticbrainz.py +++ b/libs/beetsplug/acousticbrainz.py @@ -18,20 +18,101 @@ from __future__ import division, absolute_import, print_function import requests -import operator +from collections import defaultdict from beets import plugins, ui -from functools import reduce ACOUSTIC_BASE = "https://acousticbrainz.org/" LEVELS = ["/low-level", "/high-level"] +ABSCHEME = { + 'highlevel': { + 'danceability': { + 'all': { + 'danceable': 'danceable' + } + }, + 'gender': { + 'value': 'gender' + }, + 'genre_rosamerica': { + 'value': 'genre_rosamerica' + }, + 'mood_acoustic': { + 'all': { + 'acoustic': 'mood_acoustic' + } + }, + 'mood_aggressive': { + 'all': { + 'aggressive': 'mood_aggressive' + } + }, + 'mood_electronic': { + 'all': { + 'electronic': 'mood_electronic' + } + }, + 'mood_happy': { + 'all': { + 'happy': 'mood_happy' + } + }, + 'mood_party': { + 'all': { + 'party': 'mood_party' + } + }, + 'mood_relaxed': { + 'all': { + 'relaxed': 'mood_relaxed' + } + }, + 'mood_sad': { + 'all': { + 'sad': 'mood_sad' + } + }, + 'ismir04_rhythm': { + 'value': 'rhythm' + }, + 'tonal_atonal': { + 'all': { + 'tonal': 'tonal' + } + }, + 'voice_instrumental': { + 'value': 'voice_instrumental' + }, + }, + 'lowlevel': { + 'average_loudness': 'average_loudness' + }, + 'rhythm': { + 'bpm': 'bpm' + }, + 'tonal': { + 'chords_changes_rate': 'chords_changes_rate', + 'chords_key': 'chords_key', + 'chords_number_rate': 'chords_number_rate', + 'chords_scale': 'chords_scale', + 'key_key': ('initial_key', 0), + 'key_scale': ('initial_key', 1), + 'key_strength': 'key_strength' + + } +} class AcousticPlugin(plugins.BeetsPlugin): def __init__(self): super(AcousticPlugin, self).__init__() - self.config.add({'auto': True}) + self.config.add({ + 'auto': True, + 'force': False, + 'tags': [] + }) + if self.config['auto']: self.register_listener('import_task_files', self.import_task_files) @@ -39,10 +120,16 @@ class AcousticPlugin(plugins.BeetsPlugin): def commands(self): cmd = ui.Subcommand('acousticbrainz', help=u"fetch metadata from AcousticBrainz") + cmd.parser.add_option( + u'-f', u'--force', dest='force_refetch', + action='store_true', default=False, + help=u're-download data when already present' + ) def func(lib, opts, args): items = lib.items(ui.decargs(args)) - fetch_info(self._log, items, ui.should_write()) + self._fetch_info(items, ui.should_write(), + opts.force_refetch or self.config['force']) cmd.func = func return [cmd] @@ -50,116 +137,169 @@ class AcousticPlugin(plugins.BeetsPlugin): def import_task_files(self, session, task): """Function is called upon beet import. """ + self._fetch_info(task.imported_items(), False, True) - items = task.imported_items() - fetch_info(self._log, items, False) + def _get_data(self, mbid): + data = {} + for url in _generate_urls(mbid): + self._log.debug(u'fetching URL: {}', url) - -def fetch_info(log, items, write): - """Get data from AcousticBrainz for the items. - """ - - def get_value(*map_path): - try: - return reduce(operator.getitem, map_path, data) - except KeyError: - log.debug(u'Invalid Path: {}', map_path) - - for item in items: - if item.mb_trackid: - log.info(u'getting data for: {}', item) - - # Fetch the data from the AB API. - urls = [generate_url(item.mb_trackid, path) for path in LEVELS] - log.debug(u'fetching URLs: {}', ' '.join(urls)) try: - res = [requests.get(url) for url in urls] + res = requests.get(url) except requests.RequestException as exc: - log.info(u'request error: {}', exc) - continue + self._log.info(u'request error: {}', exc) + return {} - # Check for missing tracks. - if any(r.status_code == 404 for r in res): - log.info(u'recording ID {} not found', item.mb_trackid) - continue + if res.status_code == 404: + self._log.info(u'recording ID {} not found', mbid) + return {} - # Parse the JSON response. try: - data = res[0].json() - data.update(res[1].json()) + data.update(res.json()) except ValueError: - log.debug(u'Invalid Response: {} & {}', [r.text for r in res]) + self._log.debug(u'Invalid Response: {}', res.text) + return {} - # Get each field and assign it on the item. - item.danceable = get_value( - "highlevel", "danceability", "all", "danceable", - ) - item.gender = get_value( - "highlevel", "gender", "value", - ) - item.genre_rosamerica = get_value( - "highlevel", "genre_rosamerica", "value" - ) - item.mood_acoustic = get_value( - "highlevel", "mood_acoustic", "all", "acoustic" - ) - item.mood_aggressive = get_value( - "highlevel", "mood_aggressive", "all", "aggressive" - ) - item.mood_electronic = get_value( - "highlevel", "mood_electronic", "all", "electronic" - ) - item.mood_happy = get_value( - "highlevel", "mood_happy", "all", "happy" - ) - item.mood_party = get_value( - "highlevel", "mood_party", "all", "party" - ) - item.mood_relaxed = get_value( - "highlevel", "mood_relaxed", "all", "relaxed" - ) - item.mood_sad = get_value( - "highlevel", "mood_sad", "all", "sad" - ) - item.rhythm = get_value( - "highlevel", "ismir04_rhythm", "value" - ) - item.tonal = get_value( - "highlevel", "tonal_atonal", "all", "tonal" - ) - item.voice_instrumental = get_value( - "highlevel", "voice_instrumental", "value" - ) - item.average_loudness = get_value( - "lowlevel", "average_loudness" - ) - item.chords_changes_rate = get_value( - "tonal", "chords_changes_rate" - ) - item.chords_key = get_value( - "tonal", "chords_key" - ) - item.chords_number_rate = get_value( - "tonal", "chords_number_rate" - ) - item.chords_scale = get_value( - "tonal", "chords_scale" - ) - item.initial_key = '{} {}'.format( - get_value("tonal", "key_key"), - get_value("tonal", "key_scale") - ) - item.key_strength = get_value( - "tonal", "key_strength" - ) + return data - # Store the data. - item.store() - if write: - item.try_write() + def _fetch_info(self, items, write, force): + """Fetch additional information from AcousticBrainz for the `item`s. + """ + tags = self.config['tags'].as_str_seq() + for item in items: + # If we're not forcing re-downloading for all tracks, check + # whether the data is already present. We use one + # representative field name to check for previously fetched + # data. + if not force: + mood_str = item.get('mood_acoustic', u'') + if mood_str: + self._log.info(u'data already present for: {}', item) + continue + + # We can only fetch data for tracks with MBIDs. + if not item.mb_trackid: + continue + + self._log.info(u'getting data for: {}', item) + data = self._get_data(item.mb_trackid) + if data: + for attr, val in self._map_data_to_scheme(data, ABSCHEME): + if not tags or attr in tags: + self._log.debug(u'attribute {} of {} set to {}', + attr, + item, + val) + setattr(item, attr, val) + else: + self._log.debug(u'skipping attribute {} of {}' + u' (value {}) due to config', + attr, + item, + val) + item.store() + if write: + item.try_write() + + def _map_data_to_scheme(self, data, scheme): + """Given `data` as a structure of nested dictionaries, and `scheme` as a + structure of nested dictionaries , `yield` tuples `(attr, val)` where + `attr` and `val` are corresponding leaf nodes in `scheme` and `data`. + + As its name indicates, `scheme` defines how the data is structured, + so this function tries to find leaf nodes in `data` that correspond + to the leafs nodes of `scheme`, and not the other way around. + Leaf nodes of `data` that do not exist in the `scheme` do not matter. + If a leaf node of `scheme` is not present in `data`, + no value is yielded for that attribute and a simple warning is issued. + + Finally, to account for attributes of which the value is split between + several leaf nodes in `data`, leaf nodes of `scheme` can be tuples + `(attr, order)` where `attr` is the attribute to which the leaf node + belongs, and `order` is the place at which it should appear in the + value. The different `value`s belonging to the same `attr` are simply + joined with `' '`. This is hardcoded and not very flexible, but it gets + the job done. + + For example: + + >>> scheme = { + 'key1': 'attribute', + 'key group': { + 'subkey1': 'subattribute', + 'subkey2': ('composite attribute', 0) + }, + 'key2': ('composite attribute', 1) + } + >>> data = { + 'key1': 'value', + 'key group': { + 'subkey1': 'subvalue', + 'subkey2': 'part 1 of composite attr' + }, + 'key2': 'part 2' + } + >>> print(list(_map_data_to_scheme(data, scheme))) + [('subattribute', 'subvalue'), + ('attribute', 'value'), + ('composite attribute', 'part 1 of composite attr part 2')] + """ + # First, we traverse `scheme` and `data`, `yield`ing all the non + # composites attributes straight away and populating the dictionary + # `composites` with the composite attributes. + + # When we are finished traversing `scheme`, `composites` should + # map each composite attribute to an ordered list of the values + # belonging to the attribute, for example: + # `composites = {'initial_key': ['B', 'minor']}`. + + # The recursive traversal. + composites = defaultdict(list) + for attr, val in self._data_to_scheme_child(data, + scheme, + composites): + yield attr, val + + # When composites has been populated, yield the composite attributes + # by joining their parts. + for composite_attr, value_parts in composites.items(): + yield composite_attr, ' '.join(value_parts) + + def _data_to_scheme_child(self, subdata, subscheme, composites): + """The recursive business logic of :meth:`_map_data_to_scheme`: + Traverse two structures of nested dictionaries in parallel and `yield` + tuples of corresponding leaf nodes. + + If a leaf node belongs to a composite attribute (is a `tuple`), + populate `composites` rather than yielding straight away. + All the child functions for a single traversal share the same + `composites` instance, which is passed along. + """ + for k, v in subscheme.items(): + if k in subdata: + if type(v) == dict: + for attr, val in self._data_to_scheme_child(subdata[k], + v, + composites): + yield attr, val + elif type(v) == tuple: + composite_attribute, part_number = v + attribute_parts = composites[composite_attribute] + # Parts are not guaranteed to be inserted in order + while len(attribute_parts) <= part_number: + attribute_parts.append('') + attribute_parts[part_number] = subdata[k] + else: + yield v, subdata[k] + else: + self._log.warning(u'Acousticbrainz did not provide info' + u'about {}', k) + self._log.debug(u'Data {} could not be mapped to scheme {} ' + u'because key {} was not found', subdata, v, k) -def generate_url(mbid, level): - """Generates AcousticBrainz end point url for given MBID. +def _generate_urls(mbid): + """Generates AcousticBrainz end point urls for given `mbid`. """ - return ACOUSTIC_BASE + mbid + level + for level in LEVELS: + yield ACOUSTIC_BASE + mbid + level diff --git a/libs/beetsplug/badfiles.py b/libs/beetsplug/badfiles.py index f9704d48..62c6d8af 100644 --- a/libs/beetsplug/badfiles.py +++ b/libs/beetsplug/badfiles.py @@ -27,6 +27,24 @@ import shlex import os import errno import sys +import six + + +class CheckerCommandException(Exception): + """Raised when running a checker failed. + + Attributes: + checker: Checker command name. + path: Path to the file being validated. + errno: Error number from the checker execution error. + msg: Message from the checker execution error. + """ + + def __init__(self, cmd, oserror): + self.checker = cmd[0] + self.path = cmd[-1] + self.errno = oserror.errno + self.msg = str(oserror) class BadFiles(BeetsPlugin): @@ -42,11 +60,7 @@ class BadFiles(BeetsPlugin): errors = 1 status = e.returncode except OSError as e: - if e.errno == errno.ENOENT: - ui.print_(u"command not found: {}".format(cmd[0])) - sys.exit(1) - else: - raise + raise CheckerCommandException(cmd, e) output = output.decode(sys.getfilesystemencoding()) return status, errors, [line for line in output.split("\n") if line] @@ -92,29 +106,47 @@ class BadFiles(BeetsPlugin): ui.colorize('text_error', dpath))) # Run the checker against the file if one is found - ext = os.path.splitext(item.path)[1][1:] + ext = os.path.splitext(item.path)[1][1:].decode('utf8', 'ignore') checker = self.get_checker(ext) if not checker: + self._log.error(u"no checker specified in the config for {}", + ext) continue path = item.path - if not isinstance(path, unicode): + if not isinstance(path, six.text_type): path = item.path.decode(sys.getfilesystemencoding()) - status, errors, output = checker(path) + try: + status, errors, output = checker(path) + except CheckerCommandException as e: + if e.errno == errno.ENOENT: + self._log.error( + u"command not found: {} when validating file: {}", + e.checker, + e.path + ) + else: + self._log.error(u"error invoking {}: {}", e.checker, e.msg) + continue if status > 0: - ui.print_(u"{}: checker exited withs status {}" + ui.print_(u"{}: checker exited with status {}" .format(ui.colorize('text_error', dpath), status)) for line in output: - ui.print_(" {}".format(displayable_path(line))) + ui.print_(u" {}".format(displayable_path(line))) elif errors > 0: ui.print_(u"{}: checker found {} errors or warnings" .format(ui.colorize('text_warning', dpath), errors)) for line in output: ui.print_(u" {}".format(displayable_path(line))) - else: + elif opts.verbose: ui.print_(u"{}: ok".format(ui.colorize('text_success', dpath))) def commands(self): bad_command = Subcommand('bad', help=u'check for corrupt or missing files') + bad_command.parser.add_option( + u'-v', u'--verbose', + action='store_true', default=False, dest='verbose', + help=u'view results for both the bad and uncorrupted files' + ) bad_command.func = self.check_bad return [bad_command] diff --git a/libs/beetsplug/beatport.py b/libs/beetsplug/beatport.py new file mode 100644 index 00000000..fc412d99 --- /dev/null +++ b/libs/beetsplug/beatport.py @@ -0,0 +1,461 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Beatport release and track search support to the autotagger +""" +from __future__ import division, absolute_import, print_function + +import json +import re +import six +from datetime import datetime, timedelta + +from requests_oauthlib import OAuth1Session +from requests_oauthlib.oauth1_session import (TokenRequestDenied, TokenMissing, + VerifierMissing) + +import beets +import beets.ui +from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance +from beets.plugins import BeetsPlugin +from beets.util import confit + + +AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing) +USER_AGENT = u'beets/{0} +http://beets.io/'.format(beets.__version__) + + +class BeatportAPIError(Exception): + pass + + +class BeatportObject(object): + def __init__(self, data): + self.beatport_id = data['id'] + self.name = six.text_type(data['name']) + if 'releaseDate' in data: + self.release_date = datetime.strptime(data['releaseDate'], + '%Y-%m-%d') + if 'artists' in data: + self.artists = [(x['id'], six.text_type(x['name'])) + for x in data['artists']] + if 'genres' in data: + self.genres = [six.text_type(x['name']) + for x in data['genres']] + + +class BeatportClient(object): + _api_base = 'https://oauth-api.beatport.com' + + def __init__(self, c_key, c_secret, auth_key=None, auth_secret=None): + """ Initiate the client with OAuth information. + + For the initial authentication with the backend `auth_key` and + `auth_secret` can be `None`. Use `get_authorize_url` and + `get_access_token` to obtain them for subsequent uses of the API. + + :param c_key: OAuth1 client key + :param c_secret: OAuth1 client secret + :param auth_key: OAuth1 resource owner key + :param auth_secret: OAuth1 resource owner secret + """ + self.api = OAuth1Session( + client_key=c_key, client_secret=c_secret, + resource_owner_key=auth_key, + resource_owner_secret=auth_secret, + callback_uri='oob') + self.api.headers = {'User-Agent': USER_AGENT} + + def get_authorize_url(self): + """ Generate the URL for the user to authorize the application. + + Retrieves a request token from the Beatport API and returns the + corresponding authorization URL on their end that the user has + to visit. + + This is the first step of the initial authorization process with the + API. Once the user has visited the URL, call + :py:method:`get_access_token` with the displayed data to complete + the process. + + :returns: Authorization URL for the user to visit + :rtype: unicode + """ + self.api.fetch_request_token( + self._make_url('/identity/1/oauth/request-token')) + return self.api.authorization_url( + self._make_url('/identity/1/oauth/authorize')) + + def get_access_token(self, auth_data): + """ Obtain the final access token and secret for the API. + + :param auth_data: URL-encoded authorization data as displayed at + the authorization url (obtained via + :py:meth:`get_authorize_url`) after signing in + :type auth_data: unicode + :returns: OAuth resource owner key and secret + :rtype: (unicode, unicode) tuple + """ + self.api.parse_authorization_response( + "http://beets.io/auth?" + auth_data) + access_data = self.api.fetch_access_token( + self._make_url('/identity/1/oauth/access-token')) + return access_data['oauth_token'], access_data['oauth_token_secret'] + + def search(self, query, release_type='release', details=True): + """ Perform a search of the Beatport catalogue. + + :param query: Query string + :param release_type: Type of releases to search for, can be + 'release' or 'track' + :param details: Retrieve additional information about the + search results. Currently this will fetch + the tracklist for releases and do nothing for + tracks + :returns: Search results + :rtype: generator that yields + py:class:`BeatportRelease` or + :py:class:`BeatportTrack` + """ + response = self._get('catalog/3/search', + query=query, perPage=5, + facets=['fieldType:{0}'.format(release_type)]) + for item in response: + if release_type == 'release': + if details: + release = self.get_release(item['id']) + else: + release = BeatportRelease(item) + yield release + elif release_type == 'track': + yield BeatportTrack(item) + + def get_release(self, beatport_id): + """ Get information about a single release. + + :param beatport_id: Beatport ID of the release + :returns: The matching release + :rtype: :py:class:`BeatportRelease` + """ + response = self._get('/catalog/3/releases', id=beatport_id) + release = BeatportRelease(response[0]) + release.tracks = self.get_release_tracks(beatport_id) + return release + + def get_release_tracks(self, beatport_id): + """ Get all tracks for a given release. + + :param beatport_id: Beatport ID of the release + :returns: Tracks in the matching release + :rtype: list of :py:class:`BeatportTrack` + """ + response = self._get('/catalog/3/tracks', releaseId=beatport_id, + perPage=100) + return [BeatportTrack(t) for t in response] + + def get_track(self, beatport_id): + """ Get information about a single track. + + :param beatport_id: Beatport ID of the track + :returns: The matching track + :rtype: :py:class:`BeatportTrack` + """ + response = self._get('/catalog/3/tracks', id=beatport_id) + return BeatportTrack(response[0]) + + def _make_url(self, endpoint): + """ Get complete URL for a given API endpoint. """ + if not endpoint.startswith('/'): + endpoint = '/' + endpoint + return self._api_base + endpoint + + def _get(self, endpoint, **kwargs): + """ Perform a GET request on a given API endpoint. + + Automatically extracts result data from the response and converts HTTP + exceptions into :py:class:`BeatportAPIError` objects. + """ + try: + response = self.api.get(self._make_url(endpoint), params=kwargs) + except Exception as e: + raise BeatportAPIError("Error connecting to Beatport API: {}" + .format(e.message)) + if not response: + raise BeatportAPIError( + "Error {0.status_code} for '{0.request.path_url}" + .format(response)) + return response.json()['results'] + + +@six.python_2_unicode_compatible +class BeatportRelease(BeatportObject): + def __str__(self): + if len(self.artists) < 4: + artist_str = ", ".join(x[1] for x in self.artists) + else: + artist_str = "Various Artists" + return u"".format( + artist_str, + self.name, + self.catalog_number, + ) + + def __repr__(self): + return six.text_type(self).encode('utf-8') + + def __init__(self, data): + BeatportObject.__init__(self, data) + if 'catalogNumber' in data: + self.catalog_number = data['catalogNumber'] + if 'label' in data: + self.label_name = data['label']['name'] + if 'category' in data: + self.category = data['category'] + if 'slug' in data: + self.url = "http://beatport.com/release/{0}/{1}".format( + data['slug'], data['id']) + + +@six.python_2_unicode_compatible +class BeatportTrack(BeatportObject): + def __str__(self): + artist_str = ", ".join(x[1] for x in self.artists) + return (u"" + .format(artist_str, self.name, self.mix_name)) + + def __repr__(self): + return six.text_type(self).encode('utf-8') + + def __init__(self, data): + BeatportObject.__init__(self, data) + if 'title' in data: + self.title = six.text_type(data['title']) + if 'mixName' in data: + self.mix_name = six.text_type(data['mixName']) + self.length = timedelta(milliseconds=data.get('lengthMs', 0) or 0) + if not self.length: + try: + min, sec = data.get('length', '0:0').split(':') + self.length = timedelta(minutes=int(min), seconds=int(sec)) + except ValueError: + pass + if 'slug' in data: + self.url = "http://beatport.com/track/{0}/{1}".format(data['slug'], + data['id']) + self.track_number = data.get('trackNumber') + + +class BeatportPlugin(BeetsPlugin): + def __init__(self): + super(BeatportPlugin, self).__init__() + self.config.add({ + 'apikey': '57713c3906af6f5def151b33601389176b37b429', + 'apisecret': 'b3fe08c93c80aefd749fe871a16cd2bb32e2b954', + 'tokenfile': 'beatport_token.json', + 'source_weight': 0.5, + }) + self.config['apikey'].redact = True + self.config['apisecret'].redact = True + self.client = None + self.register_listener('import_begin', self.setup) + + def setup(self, session=None): + c_key = self.config['apikey'].as_str() + c_secret = self.config['apisecret'].as_str() + + # Get the OAuth token from a file or log in. + try: + with open(self._tokenfile()) as f: + tokendata = json.load(f) + except IOError: + # No token yet. Generate one. + token, secret = self.authenticate(c_key, c_secret) + else: + token = tokendata['token'] + secret = tokendata['secret'] + + self.client = BeatportClient(c_key, c_secret, token, secret) + + def authenticate(self, c_key, c_secret): + # Get the link for the OAuth page. + auth_client = BeatportClient(c_key, c_secret) + try: + url = auth_client.get_authorize_url() + except AUTH_ERRORS as e: + self._log.debug(u'authentication error: {0}', e) + raise beets.ui.UserError(u'communication with Beatport failed') + + beets.ui.print_(u"To authenticate with Beatport, visit:") + beets.ui.print_(url) + + # Ask for the verifier data and validate it. + data = beets.ui.input_(u"Enter the string displayed in your browser:") + try: + token, secret = auth_client.get_access_token(data) + except AUTH_ERRORS as e: + self._log.debug(u'authentication error: {0}', e) + raise beets.ui.UserError(u'Beatport token request failed') + + # Save the token for later use. + self._log.debug(u'Beatport token {0}, secret {1}', token, secret) + with open(self._tokenfile(), 'w') as f: + json.dump({'token': token, 'secret': secret}, f) + + return token, secret + + def _tokenfile(self): + """Get the path to the JSON file for storing the OAuth token. + """ + return self.config['tokenfile'].get(confit.Filename(in_app_dir=True)) + + def album_distance(self, items, album_info, mapping): + """Returns the beatport source weight and the maximum source weight + for albums. + """ + dist = Distance() + if album_info.data_source == 'Beatport': + dist.add('source', self.config['source_weight'].as_number()) + return dist + + def track_distance(self, item, track_info): + """Returns the beatport source weight and the maximum source weight + for individual tracks. + """ + dist = Distance() + if track_info.data_source == 'Beatport': + dist.add('source', self.config['source_weight'].as_number()) + return dist + + def candidates(self, items, artist, release, va_likely): + """Returns a list of AlbumInfo objects for beatport search results + matching release and artist (if not various). + """ + if va_likely: + query = release + else: + query = '%s %s' % (artist, release) + try: + return self._get_releases(query) + except BeatportAPIError as e: + self._log.debug(u'API Error: {0} (query: {1})', e, query) + return [] + + def item_candidates(self, item, artist, title): + """Returns a list of TrackInfo objects for beatport search results + matching title and artist. + """ + query = '%s %s' % (artist, title) + try: + return self._get_tracks(query) + except BeatportAPIError as e: + self._log.debug(u'API Error: {0} (query: {1})', e, query) + return [] + + def album_for_id(self, release_id): + """Fetches a release by its Beatport ID and returns an AlbumInfo object + or None if the release is not found. + """ + self._log.debug(u'Searching for release {0}', release_id) + match = re.search(r'(^|beatport\.com/release/.+/)(\d+)$', release_id) + if not match: + return None + release = self.client.get_release(match.group(2)) + album = self._get_album_info(release) + return album + + def track_for_id(self, track_id): + """Fetches a track by its Beatport ID and returns a TrackInfo object + or None if the track is not found. + """ + self._log.debug(u'Searching for track {0}', track_id) + match = re.search(r'(^|beatport\.com/track/.+/)(\d+)$', track_id) + if not match: + return None + bp_track = self.client.get_track(match.group(2)) + track = self._get_track_info(bp_track) + return track + + def _get_releases(self, query): + """Returns a list of AlbumInfo objects for a beatport search query. + """ + # Strip non-word characters from query. Things like "!" and "-" can + # cause a query to return no results, even if they match the artist or + # album title. Use `re.UNICODE` flag to avoid stripping non-english + # word characters. + query = re.sub(r'\W+', ' ', query, flags=re.UNICODE) + # Strip medium information from query, Things like "CD1" and "disk 1" + # can also negate an otherwise positive result. + query = re.sub(r'\b(CD|disc)\s*\d+', '', query, flags=re.I) + albums = [self._get_album_info(x) + for x in self.client.search(query)] + return albums + + def _get_album_info(self, release): + """Returns an AlbumInfo object for a Beatport Release object. + """ + va = len(release.artists) > 3 + artist, artist_id = self._get_artist(release.artists) + if va: + artist = u"Various Artists" + tracks = [self._get_track_info(x) for x in release.tracks] + + return AlbumInfo(album=release.name, album_id=release.beatport_id, + artist=artist, artist_id=artist_id, tracks=tracks, + albumtype=release.category, va=va, + year=release.release_date.year, + month=release.release_date.month, + day=release.release_date.day, + label=release.label_name, + catalognum=release.catalog_number, media=u'Digital', + data_source=u'Beatport', data_url=release.url) + + def _get_track_info(self, track): + """Returns a TrackInfo object for a Beatport Track object. + """ + title = track.name + if track.mix_name != u"Original Mix": + title += u" ({0})".format(track.mix_name) + artist, artist_id = self._get_artist(track.artists) + length = track.length.total_seconds() + return TrackInfo(title=title, track_id=track.beatport_id, + artist=artist, artist_id=artist_id, + length=length, index=track.track_number, + medium_index=track.track_number, + data_source=u'Beatport', data_url=track.url) + + def _get_artist(self, artists): + """Returns an artist string (all artists) and an artist_id (the main + artist) for a list of Beatport release or track artists. + """ + artist_id = None + bits = [] + for artist in artists: + if not artist_id: + artist_id = artist[0] + name = artist[1] + # Strip disambiguation number. + name = re.sub(r' \(\d+\)$', '', name) + # Move articles to the front. + name = re.sub(r'^(.*?), (a|an|the)$', r'\2 \1', name, flags=re.I) + bits.append(name) + artist = ', '.join(bits).replace(' ,', ',') or None + return artist, artist_id + + def _get_tracks(self, query): + """Returns a list of TrackInfo objects for a Beatport query. + """ + bp_tracks = self.client.search(query, release_type='track') + tracks = [self._get_track_info(x) for x in bp_tracks] + return tracks diff --git a/libs/beetsplug/bpd/__init__.py b/libs/beetsplug/bpd/__init__.py index 33deda02..1049f0c7 100644 --- a/libs/beetsplug/bpd/__init__.py +++ b/libs/beetsplug/bpd/__init__.py @@ -35,17 +35,18 @@ from beets.util import bluelet from beets.library import Item from beets import dbcore from beets.mediafile import MediaFile +import six PROTOCOL_VERSION = '0.13.0' BUFSIZE = 1024 -HELLO = 'OK MPD %s' % PROTOCOL_VERSION -CLIST_BEGIN = 'command_list_begin' -CLIST_VERBOSE_BEGIN = 'command_list_ok_begin' -CLIST_END = 'command_list_end' -RESP_OK = 'OK' -RESP_CLIST_VERBOSE = 'list_OK' -RESP_ERR = 'ACK' +HELLO = u'OK MPD %s' % PROTOCOL_VERSION +CLIST_BEGIN = u'command_list_begin' +CLIST_VERBOSE_BEGIN = u'command_list_ok_begin' +CLIST_END = u'command_list_end' +RESP_OK = u'OK' +RESP_CLIST_VERBOSE = u'list_OK' +RESP_ERR = u'ACK' NEWLINE = u"\n" @@ -305,12 +306,12 @@ class BaseServer(object): playlist, playlistlength, and xfade. """ yield ( - u'volume: ' + unicode(self.volume), - u'repeat: ' + unicode(int(self.repeat)), - u'random: ' + unicode(int(self.random)), - u'playlist: ' + unicode(self.playlist_version), - u'playlistlength: ' + unicode(len(self.playlist)), - u'xfade: ' + unicode(self.crossfade), + u'volume: ' + six.text_type(self.volume), + u'repeat: ' + six.text_type(int(self.repeat)), + u'random: ' + six.text_type(int(self.random)), + u'playlist: ' + six.text_type(self.playlist_version), + u'playlistlength: ' + six.text_type(len(self.playlist)), + u'xfade: ' + six.text_type(self.crossfade), ) if self.current_index == -1: @@ -323,8 +324,8 @@ class BaseServer(object): if self.current_index != -1: # i.e., paused or playing current_id = self._item_id(self.playlist[self.current_index]) - yield u'song: ' + unicode(self.current_index) - yield u'songid: ' + unicode(current_id) + yield u'song: ' + six.text_type(self.current_index) + yield u'songid: ' + six.text_type(current_id) if self.error: yield u'error: ' + self.error @@ -468,8 +469,8 @@ class BaseServer(object): Also a dummy implementation. """ for idx, track in enumerate(self.playlist): - yield u'cpos: ' + unicode(idx) - yield u'Id: ' + unicode(track.id) + yield u'cpos: ' + six.text_type(idx) + yield u'Id: ' + six.text_type(track.id) def cmd_currentsong(self, conn): """Sends information about the currently-playing song. @@ -569,12 +570,12 @@ class Connection(object): added after every string. Returns a Bluelet event that sends the data. """ - if isinstance(lines, basestring): + if isinstance(lines, six.string_types): lines = [lines] out = NEWLINE.join(lines) + NEWLINE log.debug('{}', out[:-1]) # Don't log trailing newline. - if isinstance(out, unicode): - out = out.encode('utf8') + if isinstance(out, six.text_type): + out = out.encode('utf-8') return self.sock.sendall(out) def do_command(self, command): @@ -603,7 +604,8 @@ class Connection(object): line = line.strip() if not line: break - log.debug('{}', line) + line = line.decode('utf8') # MPD protocol uses UTF-8. + log.debug(u'{}', line) if clist is not None: # Command list already opened. @@ -639,8 +641,8 @@ class Command(object): """A command issued by the client for processing by the server. """ - command_re = re.compile(br'^([^ \t]+)[ \t]*') - arg_re = re.compile(br'"((?:\\"|[^"])+)"|([^ \t"]+)') + command_re = re.compile(r'^([^ \t]+)[ \t]*') + arg_re = re.compile(r'"((?:\\"|[^"])+)"|([^ \t"]+)') def __init__(self, s): """Creates a new `Command` from the given string, `s`, parsing @@ -655,11 +657,10 @@ class Command(object): if match[0]: # Quoted argument. arg = match[0] - arg = arg.replace(b'\\"', b'"').replace(b'\\\\', b'\\') + arg = arg.replace(u'\\"', u'"').replace(u'\\\\', u'\\') else: # Unquoted argument. arg = match[1] - arg = arg.decode('utf8') self.args.append(arg) def run(self, conn): @@ -771,28 +772,28 @@ class Server(BaseServer): def _item_info(self, item): info_lines = [ u'file: ' + item.destination(fragment=True), - u'Time: ' + unicode(int(item.length)), + u'Time: ' + six.text_type(int(item.length)), u'Title: ' + item.title, u'Artist: ' + item.artist, u'Album: ' + item.album, u'Genre: ' + item.genre, ] - track = unicode(item.track) + track = six.text_type(item.track) if item.tracktotal: - track += u'/' + unicode(item.tracktotal) + track += u'/' + six.text_type(item.tracktotal) info_lines.append(u'Track: ' + track) - info_lines.append(u'Date: ' + unicode(item.year)) + info_lines.append(u'Date: ' + six.text_type(item.year)) try: pos = self._id_to_index(item.id) - info_lines.append(u'Pos: ' + unicode(pos)) + info_lines.append(u'Pos: ' + six.text_type(pos)) except ArgumentNotFoundError: # Don't include position if not in playlist. pass - info_lines.append(u'Id: ' + unicode(item.id)) + info_lines.append(u'Id: ' + six.text_type(item.id)) return info_lines @@ -852,7 +853,7 @@ class Server(BaseServer): for name, itemid in iter(sorted(node.files.items())): item = self.lib.get_item(itemid) yield self._item_info(item) - for name, _ in iter(sorted(node.dirs.iteritems())): + for name, _ in iter(sorted(node.dirs.items())): dirpath = self._path_join(path, name) if dirpath.startswith(u"/"): # Strip leading slash (libmpc rejects this). @@ -872,12 +873,12 @@ class Server(BaseServer): yield u'file: ' + basepath else: # List a directory. Recurse into both directories and files. - for name, itemid in sorted(node.files.iteritems()): + for name, itemid in sorted(node.files.items()): newpath = self._path_join(basepath, name) # "yield from" for v in self._listall(newpath, itemid, info): yield v - for name, subdir in sorted(node.dirs.iteritems()): + for name, subdir in sorted(node.dirs.items()): newpath = self._path_join(basepath, name) yield u'directory: ' + newpath for v in self._listall(newpath, subdir, info): @@ -902,11 +903,11 @@ class Server(BaseServer): yield self.lib.get_item(node) else: # Recurse into a directory. - for name, itemid in sorted(node.files.iteritems()): + for name, itemid in sorted(node.files.items()): # "yield from" for v in self._all_items(itemid): yield v - for name, subdir in sorted(node.dirs.iteritems()): + for name, subdir in sorted(node.dirs.items()): for v in self._all_items(subdir): yield v @@ -917,7 +918,7 @@ class Server(BaseServer): for item in self._all_items(self._resolve_path(path)): self.playlist.append(item) if send_id: - yield u'Id: ' + unicode(item.id) + yield u'Id: ' + six.text_type(item.id) self.playlist_version += 1 def cmd_add(self, conn, path): @@ -938,11 +939,11 @@ class Server(BaseServer): if self.current_index > -1: item = self.playlist[self.current_index] - yield u'bitrate: ' + unicode(item.bitrate / 1000) + yield u'bitrate: ' + six.text_type(item.bitrate / 1000) # Missing 'audio'. (pos, total) = self.player.time() - yield u'time: ' + unicode(pos) + u':' + unicode(total) + yield u'time: ' + six.text_type(pos) + u':' + six.text_type(total) # Also missing 'updating_db'. @@ -957,13 +958,13 @@ class Server(BaseServer): artists, albums, songs, totaltime = tx.query(statement)[0] yield ( - u'artists: ' + unicode(artists), - u'albums: ' + unicode(albums), - u'songs: ' + unicode(songs), - u'uptime: ' + unicode(int(time.time() - self.startup_time)), + u'artists: ' + six.text_type(artists), + u'albums: ' + six.text_type(albums), + u'songs: ' + six.text_type(songs), + u'uptime: ' + six.text_type(int(time.time() - self.startup_time)), u'playtime: ' + u'0', # Missing. - u'db_playtime: ' + unicode(int(totaltime)), - u'db_update: ' + unicode(int(self.updated_time)), + u'db_playtime: ' + six.text_type(int(totaltime)), + u'db_update: ' + six.text_type(int(self.updated_time)), ) # Searching. @@ -1059,7 +1060,7 @@ class Server(BaseServer): rows = tx.query(statement, subvals) for row in rows: - yield show_tag_canon + u': ' + unicode(row[0]) + yield show_tag_canon + u': ' + six.text_type(row[0]) def cmd_count(self, conn, tag, value): """Returns the number and total time of songs matching the @@ -1071,8 +1072,8 @@ class Server(BaseServer): for item in self.lib.items(dbcore.query.MatchQuery(key, value)): songs += 1 playtime += item.length - yield u'songs: ' + unicode(songs) - yield u'playtime: ' + unicode(int(playtime)) + yield u'songs: ' + six.text_type(songs) + yield u'playtime: ' + six.text_type(int(playtime)) # "Outputs." Just a dummy implementation because we don't control # any outputs. @@ -1167,7 +1168,7 @@ class BPDPlugin(BeetsPlugin): server.run() except NoGstreamerError: global_log.error(u'Gstreamer Python bindings not found.') - global_log.error(u'Install "python-gst0.10", "py27-gst-python", ' + global_log.error(u'Install "gstreamer1.0" and "python-gi"' u'or similar package to use BPD.') def commands(self): @@ -1180,11 +1181,12 @@ class BPDPlugin(BeetsPlugin): ) def func(lib, opts, args): - host = args.pop(0) if args else self.config['host'].get(unicode) + host = self.config['host'].as_str() + host = args.pop(0) if args else host port = args.pop(0) if args else self.config['port'].get(int) if args: raise beets.ui.UserError(u'too many arguments') - password = self.config['password'].get(unicode) + password = self.config['password'].as_str() volume = self.config['volume'].get(int) debug = opts.debug or False self.start_bpd(lib, host, int(port), password, volume, debug) diff --git a/libs/beetsplug/bpd/gstplayer.py b/libs/beetsplug/bpd/gstplayer.py index b64cd009..705692aa 100644 --- a/libs/beetsplug/bpd/gstplayer.py +++ b/libs/beetsplug/bpd/gstplayer.py @@ -19,17 +19,25 @@ music player. from __future__ import division, absolute_import, print_function +import six import sys import time -import gobject -import thread +from six.moves import _thread import os import copy -import urllib +from six.moves import urllib +from beets import ui -import pygst -pygst.require('0.10') -import gst # noqa +import gi +gi.require_version('Gst', '1.0') +from gi.repository import GLib, Gst # noqa: E402 + + +Gst.init(None) + + +class QueryError(Exception): + pass class GstPlayer(object): @@ -57,8 +65,19 @@ class GstPlayer(object): # Set up the Gstreamer player. From the pygst tutorial: # http://pygstdocs.berlios.de/pygst-tutorial/playbin.html - self.player = gst.element_factory_make("playbin2", "player") - fakesink = gst.element_factory_make("fakesink", "fakesink") + #### + # Updated to GStreamer 1.0 with: + # https://wiki.ubuntu.com/Novacut/GStreamer1.0 + self.player = Gst.ElementFactory.make("playbin", "player") + + if self.player is None: + raise ui.UserError("Could not create playbin") + + fakesink = Gst.ElementFactory.make("fakesink", "fakesink") + + if fakesink is None: + raise ui.UserError("Could not create fakesink") + self.player.set_property("video-sink", fakesink) bus = self.player.get_bus() bus.add_signal_watch() @@ -74,21 +93,21 @@ class GstPlayer(object): """Returns the current state flag of the playbin.""" # gst's get_state function returns a 3-tuple; we just want the # status flag in position 1. - return self.player.get_state()[1] + return self.player.get_state(Gst.CLOCK_TIME_NONE)[1] def _handle_message(self, bus, message): """Callback for status updates from GStreamer.""" - if message.type == gst.MESSAGE_EOS: + if message.type == Gst.MessageType.EOS: # file finished playing - self.player.set_state(gst.STATE_NULL) + self.player.set_state(Gst.State.NULL) self.playing = False self.cached_time = None if self.finished_callback: self.finished_callback() - elif message.type == gst.MESSAGE_ERROR: + elif message.type == Gst.MessageType.ERROR: # error - self.player.set_state(gst.STATE_NULL) + self.player.set_state(Gst.State.NULL) err, debug = message.parse_error() print(u"Error: {0}".format(err)) self.playing = False @@ -109,27 +128,27 @@ class GstPlayer(object): """Immediately begin playing the audio file at the given path. """ - self.player.set_state(gst.STATE_NULL) - if isinstance(path, unicode): - path = path.encode('utf8') - uri = 'file://' + urllib.quote(path) + self.player.set_state(Gst.State.NULL) + if isinstance(path, six.text_type): + path = path.encode('utf-8') + uri = 'file://' + urllib.parse.quote(path) self.player.set_property("uri", uri) - self.player.set_state(gst.STATE_PLAYING) + self.player.set_state(Gst.State.PLAYING) self.playing = True def play(self): """If paused, resume playback.""" - if self._get_state() == gst.STATE_PAUSED: - self.player.set_state(gst.STATE_PLAYING) + if self._get_state() == Gst.State.PAUSED: + self.player.set_state(Gst.State.PLAYING) self.playing = True def pause(self): """Pause playback.""" - self.player.set_state(gst.STATE_PAUSED) + self.player.set_state(Gst.State.PAUSED) def stop(self): """Halt playback.""" - self.player.set_state(gst.STATE_NULL) + self.player.set_state(Gst.State.NULL) self.playing = False self.cached_time = None @@ -139,27 +158,36 @@ class GstPlayer(object): Call this function before trying to play any music with play_file() or play(). """ + # If we don't use the MainLoop, messages are never sent. - gobject.threads_init() def start(): - loop = gobject.MainLoop() + loop = GLib.MainLoop() loop.run() - thread.start_new_thread(start, ()) + + _thread.start_new_thread(start, ()) def time(self): """Returns a tuple containing (position, length) where both values are integers in seconds. If no stream is available, returns (0, 0). """ - fmt = gst.Format(gst.FORMAT_TIME) + fmt = Gst.Format(Gst.Format.TIME) try: - pos = self.player.query_position(fmt, None)[0] / (10 ** 9) - length = self.player.query_duration(fmt, None)[0] / (10 ** 9) + posq = self.player.query_position(fmt) + if not posq[0]: + raise QueryError("query_position failed") + pos = posq[1] // (10 ** 9) + + lengthq = self.player.query_duration(fmt) + if not lengthq[0]: + raise QueryError("query_duration failed") + length = lengthq[1] // (10 ** 9) + self.cached_time = (pos, length) return (pos, length) - except gst.QueryError: + except QueryError: # Stream not ready. For small gaps of time, for instance # after seeking, the time values are unavailable. For this # reason, we cache recent. @@ -175,9 +203,9 @@ class GstPlayer(object): self.stop() return - fmt = gst.Format(gst.FORMAT_TIME) + fmt = Gst.Format(Gst.Format.TIME) ns = position * 10 ** 9 # convert to nanoseconds - self.player.seek_simple(fmt, gst.SEEK_FLAG_FLUSH, ns) + self.player.seek_simple(fmt, Gst.SeekFlags.FLUSH, ns) # save new cached time self.cached_time = (position, cur_len) @@ -208,12 +236,14 @@ def play_complicated(paths): def next_song(): my_paths.pop(0) p.play_file(my_paths[0]) + p = GstPlayer(next_song) p.run() p.play_file(my_paths[0]) while my_paths: time.sleep(1) + if __name__ == '__main__': # A very simple command-line player. Just give it names of audio # files on the command line; these are all played in sequence. diff --git a/libs/beetsplug/bpm.py b/libs/beetsplug/bpm.py index ba284c04..20218bd3 100644 --- a/libs/beetsplug/bpm.py +++ b/libs/beetsplug/bpm.py @@ -18,6 +18,7 @@ from __future__ import division, absolute_import, print_function import time +from six.moves import input from beets import ui from beets.plugins import BeetsPlugin @@ -31,7 +32,7 @@ def bpm(max_strokes): dt = [] for i in range(max_strokes): # Press enter to the rhythm... - s = raw_input() + s = input() if s == '': t1 = time.time() # Only start measuring at the second stroke @@ -64,7 +65,9 @@ class BPMPlugin(BeetsPlugin): return [cmd] def command(self, lib, opts, args): - self.get_bpm(lib.items(ui.decargs(args))) + items = lib.items(ui.decargs(args)) + write = ui.should_write() + self.get_bpm(items, write) def get_bpm(self, items, write=False): overwrite = self.config['overwrite'].get(bool) diff --git a/libs/beetsplug/bucket.py b/libs/beetsplug/bucket.py index 21acb1f1..c4be2a3d 100644 --- a/libs/beetsplug/bucket.py +++ b/libs/beetsplug/bucket.py @@ -21,7 +21,8 @@ from __future__ import division, absolute_import, print_function from datetime import datetime import re import string -from itertools import tee, izip +from six.moves import zip +from itertools import tee from beets import plugins, ui @@ -37,7 +38,7 @@ def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." a, b = tee(iterable) next(b, None) - return izip(a, b) + return zip(a, b) def span_from_str(span_str): @@ -137,9 +138,10 @@ def str2fmt(s): def format_span(fmt, yearfrom, yearto, fromnchars, tonchars): """Return a span string representation. """ - args = (bytes(yearfrom)[-fromnchars:]) + args = (str(yearfrom)[-fromnchars:]) if tonchars: - args = (bytes(yearfrom)[-fromnchars:], bytes(yearto)[-tonchars:]) + args = (str(yearfrom)[-fromnchars:], str(yearto)[-tonchars:]) + return fmt % args diff --git a/libs/beetsplug/chroma.py b/libs/beetsplug/chroma.py index 148e9c20..57472956 100644 --- a/libs/beetsplug/chroma.py +++ b/libs/beetsplug/chroma.py @@ -121,7 +121,7 @@ def _all_releases(items): for release_id in release_ids: relcounts[release_id] += 1 - for release_id, count in relcounts.iteritems(): + for release_id, count in relcounts.items(): if float(count) / len(items) > COMMON_REL_THRESH: yield release_id @@ -181,7 +181,7 @@ class AcoustidPlugin(plugins.BeetsPlugin): def submit_cmd_func(lib, opts, args): try: - apikey = config['acoustid']['apikey'].get(unicode) + apikey = config['acoustid']['apikey'].as_str() except confit.NotFoundError: raise ui.UserError(u'no Acoustid user API key provided') submit_items(self._log, apikey, lib.items(ui.decargs(args))) @@ -236,7 +236,7 @@ def submit_items(log, userkey, items, chunksize=64): try: acoustid.submit(API_KEY, userkey, data) except acoustid.AcoustidError as exc: - log.warn(u'acoustid submission error: {0}', exc) + log.warning(u'acoustid submission error: {0}', exc) del data[:] for item in items: @@ -295,7 +295,7 @@ def fingerprint_item(log, item, write=False): log.info(u'{0}: fingerprinting', util.displayable_path(item.path)) try: - _, fp = acoustid.fingerprint_file(item.path) + _, fp = acoustid.fingerprint_file(util.syspath(item.path)) item.acoustid_fingerprint = fp if write: log.info(u'{0}: writing fingerprint', diff --git a/libs/beetsplug/convert.py b/libs/beetsplug/convert.py index de91604f..d1223596 100644 --- a/libs/beetsplug/convert.py +++ b/libs/beetsplug/convert.py @@ -22,13 +22,17 @@ import threading import subprocess import tempfile import shlex +import six from string import Template +import platform from beets import ui, util, plugins, config from beets.plugins import BeetsPlugin from beets.util.confit import ConfigTypeError from beets import art from beets.util.artresizer import ArtResizer +from beets.library import parse_query_string +from beets.library import Item _fs_lock = threading.Lock() _temp_files = [] # Keep track of temporary transcoded files for deletion. @@ -47,14 +51,15 @@ def replace_ext(path, ext): The new extension must not contain a leading dot. """ - return os.path.splitext(path)[0] + b'.' + ext + ext_dot = b'.' + ext + return os.path.splitext(path)[0] + ext_dot def get_format(fmt=None): """Return the command template and the extension from the config. """ if not fmt: - fmt = config['convert']['format'].get(unicode).lower() + fmt = config['convert']['format'].as_str().lower() fmt = ALIASES.get(fmt, fmt) try: @@ -67,28 +72,34 @@ def get_format(fmt=None): .format(fmt) ) except ConfigTypeError: - command = config['convert']['formats'][fmt].get(bytes) + command = config['convert']['formats'][fmt].get(str) extension = fmt # Convenience and backwards-compatibility shortcuts. keys = config['convert'].keys() if 'command' in keys: - command = config['convert']['command'].get(unicode) + command = config['convert']['command'].as_str() elif 'opts' in keys: # Undocumented option for backwards compatibility with < 1.3.1. command = u'ffmpeg -i $source -y {0} $dest'.format( - config['convert']['opts'].get(unicode) + config['convert']['opts'].as_str() ) if 'extension' in keys: - extension = config['convert']['extension'].get(unicode) + extension = config['convert']['extension'].as_str() - return (command.encode('utf8'), extension.encode('utf8')) + return (command.encode('utf-8'), extension.encode('utf-8')) def should_transcode(item, fmt): """Determine whether the item should be transcoded as part of conversion (i.e., its bitrate is high or it has the wrong format). """ + no_convert_queries = config['convert']['no_convert'].as_str_seq() + if no_convert_queries: + for query_string in no_convert_queries: + query, _ = parse_query_string(query_string, Item) + if query.match(item): + return False if config['convert']['never_convert_lossy_files'] and \ not (item.format.lower() in LOSSLESS_FORMATS): return False @@ -107,8 +118,8 @@ class ConvertPlugin(BeetsPlugin): u'format': u'mp3', u'formats': { u'aac': { - u'command': u'ffmpeg -i $source -y -vn -acodec libfaac ' - u'-aq 100 $dest', + u'command': u'ffmpeg -i $source -y -vn -acodec aac ' + u'-aq 1 $dest', u'extension': u'm4a', }, u'alac': { @@ -130,11 +141,12 @@ class ConvertPlugin(BeetsPlugin): u'quiet': False, u'embed': True, u'paths': {}, + u'no_convert': u'', u'never_convert_lossy_files': False, u'copy_album_art': False, u'album_art_maxwidth': 0, }) - self.import_stages = [self.auto_convert] + self.early_import_stages = [self.auto_convert] self.register_listener('import_task_files', self._cleanup) @@ -181,27 +193,48 @@ class ConvertPlugin(BeetsPlugin): if not quiet and not pretend: self._log.info(u'Encoding {0}', util.displayable_path(source)) + # On Python 3, we need to construct the command to invoke as a + # Unicode string. On Unix, this is a little unfortunate---the OS is + # expecting bytes---so we use surrogate escaping and decode with the + # argument encoding, which is the same encoding that will then be + # *reversed* to recover the same bytes before invoking the OS. On + # Windows, we want to preserve the Unicode filename "as is." + if not six.PY2: + command = command.decode(util.arg_encoding(), 'surrogateescape') + if platform.system() == 'Windows': + source = source.decode(util._fsencoding()) + dest = dest.decode(util._fsencoding()) + else: + source = source.decode(util.arg_encoding(), 'surrogateescape') + dest = dest.decode(util.arg_encoding(), 'surrogateescape') + # Substitute $source and $dest in the argument list. args = shlex.split(command) + encode_cmd = [] for i, arg in enumerate(args): args[i] = Template(arg).safe_substitute({ 'source': source, 'dest': dest, }) + if six.PY2: + encode_cmd.append(args[i]) + else: + encode_cmd.append(args[i].encode(util.arg_encoding())) if pretend: - self._log.info(u' '.join(ui.decargs(args))) + self._log.info(u'{0}', u' '.join(ui.decargs(args))) return try: - util.command_output(args) + util.command_output(encode_cmd) except subprocess.CalledProcessError as exc: # Something went wrong (probably Ctrl+C), remove temporary files self._log.info(u'Encoding {0} failed. Cleaning up...', util.displayable_path(source)) - self._log.debug(u'Command {0} exited with status {1}', - exc.cmd.decode('utf8', 'ignore'), - exc.returncode) + self._log.debug(u'Command {0} exited with status {1}: {2}', + args, + exc.returncode, + exc.output) util.remove(dest) util.prune_dirs(os.path.dirname(dest)) raise @@ -218,6 +251,9 @@ class ConvertPlugin(BeetsPlugin): def convert_item(self, dest_dir, keep_new, path_formats, fmt, pretend=False): + """A pipeline thread that converts `Item` objects from a + library. + """ command, ext = get_format(fmt) item, original, converted = None, None, None while True: @@ -369,61 +405,66 @@ class ConvertPlugin(BeetsPlugin): util.copy(album.artpath, dest) def convert_func(self, lib, opts, args): - if not opts.dest: - opts.dest = self.config['dest'].get() - if not opts.dest: + dest = opts.dest or self.config['dest'].get() + if not dest: raise ui.UserError(u'no convert destination set') - opts.dest = util.bytestring_path(opts.dest) + dest = util.bytestring_path(dest) - if not opts.threads: - opts.threads = self.config['threads'].get(int) + threads = opts.threads or self.config['threads'].get(int) - if self.config['paths']: - path_formats = ui.get_path_formats(self.config['paths']) + path_formats = ui.get_path_formats(self.config['paths'] or None) + + fmt = opts.format or self.config['format'].as_str().lower() + + if opts.pretend is not None: + pretend = opts.pretend else: - path_formats = ui.get_path_formats() - - if not opts.format: - opts.format = self.config['format'].get(unicode).lower() - - pretend = opts.pretend if opts.pretend is not None else \ - self.config['pretend'].get(bool) - - if not pretend: - ui.commands.list_items(lib, ui.decargs(args), opts.album) - - if not (opts.yes or ui.input_yn(u"Convert? (Y/n)")): - return + pretend = self.config['pretend'].get(bool) if opts.album: albums = lib.albums(ui.decargs(args)) - items = (i for a in albums for i in a.items()) - if self.config['copy_album_art']: - for album in albums: - self.copy_album_art(album, opts.dest, path_formats, - pretend) + items = [i for a in albums for i in a.items()] + if not pretend: + for a in albums: + ui.print_(format(a, u'')) else: - items = iter(lib.items(ui.decargs(args))) - convert = [self.convert_item(opts.dest, + items = list(lib.items(ui.decargs(args))) + if not pretend: + for i in items: + ui.print_(format(i, u'')) + + if not items: + self._log.error(u'Empty query result.') + return + if not (pretend or opts.yes or ui.input_yn(u"Convert? (Y/n)")): + return + + if opts.album and self.config['copy_album_art']: + for album in albums: + self.copy_album_art(album, dest, path_formats, pretend) + + convert = [self.convert_item(dest, opts.keep_new, path_formats, - opts.format, + fmt, pretend) - for _ in range(opts.threads)] - pipe = util.pipeline.Pipeline([items, convert]) + for _ in range(threads)] + pipe = util.pipeline.Pipeline([iter(items), convert]) pipe.run_parallel() def convert_on_import(self, lib, item): """Transcode a file automatically after it is imported into the library. """ - fmt = self.config['format'].get(unicode).lower() + fmt = self.config['format'].as_str().lower() if should_transcode(item, fmt): command, ext = get_format() # Create a temporary file for the conversion. tmpdir = self.config['tmpdir'].get() - fd, dest = tempfile.mkstemp('.' + ext, dir=tmpdir) + if tmpdir: + tmpdir = util.py3_path(util.bytestring_path(tmpdir)) + fd, dest = tempfile.mkstemp(util.py3_path(b'.' + ext), dir=tmpdir) os.close(fd) dest = util.bytestring_path(dest) _temp_files.append(dest) # Delete the transcode later. diff --git a/libs/beetsplug/cue.py b/libs/beetsplug/cue.py index 63051bfc..fd564b55 100644 --- a/libs/beetsplug/cue.py +++ b/libs/beetsplug/cue.py @@ -35,7 +35,7 @@ class CuePlugin(BeetsPlugin): return if len(cues) > 1: self._log.info(u"Found multiple cue files doing nothing: {0}", - map(displayable_path, cues)) + list(map(displayable_path, cues))) cue_file = cues[0] self._log.info("Found {} for {}", displayable_path(cue_file), item) diff --git a/libs/beetsplug/discogs.py b/libs/beetsplug/discogs.py index 62a78a5f..eeb87d31 100644 --- a/libs/beetsplug/discogs.py +++ b/libs/beetsplug/discogs.py @@ -19,31 +19,28 @@ discogs-client library. from __future__ import division, absolute_import, print_function import beets.ui -from beets import logging from beets import config from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance from beets.plugins import BeetsPlugin from beets.util import confit -from discogs_client import Release, Client +from discogs_client import Release, Master, Client from discogs_client.exceptions import DiscogsAPIError from requests.exceptions import ConnectionError +from six.moves import http_client import beets import re import time import json import socket -import httplib import os +import traceback +from string import ascii_lowercase -# Silence spurious INFO log lines generated by urllib3. -urllib3_logger = logging.getLogger('requests.packages.urllib3') -urllib3_logger.setLevel(logging.CRITICAL) - USER_AGENT = u'beets/{0} +http://beets.io/'.format(beets.__version__) # Exceptions that discogs_client should really handle but does not. -CONNECTION_ERRORS = (ConnectionError, socket.error, httplib.HTTPException, +CONNECTION_ERRORS = (ConnectionError, socket.error, http_client.HTTPException, ValueError, # JSON decoding raises a ValueError. DiscogsAPIError) @@ -57,17 +54,25 @@ class DiscogsPlugin(BeetsPlugin): 'apisecret': 'plxtUTqoCzwxZpqdPysCwGuBSmZNdZVy', 'tokenfile': 'discogs_token.json', 'source_weight': 0.5, + 'user_token': '', }) self.config['apikey'].redact = True self.config['apisecret'].redact = True + self.config['user_token'].redact = True self.discogs_client = None self.register_listener('import_begin', self.setup) def setup(self, session=None): """Create the `discogs_client` field. Authenticate if necessary. """ - c_key = self.config['apikey'].get(unicode) - c_secret = self.config['apisecret'].get(unicode) + c_key = self.config['apikey'].as_str() + c_secret = self.config['apisecret'].as_str() + + # Try using a configured user token (bypassing OAuth login). + user_token = self.config['user_token'].as_str() + if user_token: + self.discogs_client = Client(USER_AGENT, user_token=user_token) + return # Get the OAuth token from a file or log in. try: @@ -84,7 +89,7 @@ class DiscogsPlugin(BeetsPlugin): token, secret) def reset_auth(self): - """Delete toke file & redo the auth steps. + """Delete token file & redo the auth steps. """ os.remove(self._tokenfile()) self.setup() @@ -194,13 +199,13 @@ class DiscogsPlugin(BeetsPlugin): # cause a query to return no results, even if they match the artist or # album title. Use `re.UNICODE` flag to avoid stripping non-english # word characters. - # TEMPORARY: Encode as ASCII to work around a bug: + # FIXME: Encode as ASCII to work around a bug: # https://github.com/beetbox/beets/issues/1051 # When the library is fixed, we should encode as UTF-8. query = re.sub(r'(?u)\W+', ' ', query).encode('ascii', "replace") # Strip medium information from query, Things like "CD1" and "disk 1" # can also negate an otherwise positive result. - query = re.sub(r'(?i)\b(CD|disc)\s*\d+', '', query) + query = re.sub(br'(?i)\b(CD|disc)\s*\d+', b'', query) try: releases = self.discogs_client.search(query, type='release').page(1) @@ -208,11 +213,48 @@ class DiscogsPlugin(BeetsPlugin): self._log.debug(u"Communication error while searching for {0!r}", query, exc_info=True) return [] - return [self.get_album_info(release) for release in releases[:5]] + return [album for album in map(self.get_album_info, releases[:5]) + if album] + + def get_master_year(self, master_id): + """Fetches a master release given its Discogs ID and returns its year + or None if the master release is not found. + """ + self._log.debug(u'Searching for master release {0}', master_id) + result = Master(self.discogs_client, {'id': master_id}) + try: + year = result.fetch('year') + return year + except DiscogsAPIError as e: + if e.status_code != 404: + self._log.debug(u'API Error: {0} (query: {1})', e, result._uri) + if e.status_code == 401: + self.reset_auth() + return self.get_master_year(master_id) + return None + except CONNECTION_ERRORS: + self._log.debug(u'Connection error in master release lookup', + exc_info=True) + return None def get_album_info(self, result): """Returns an AlbumInfo object for a discogs Release object. """ + # Explicitly reload the `Release` fields, as they might not be yet + # present if the result is from a `discogs_client.search()`. + if not result.data.get('artists'): + result.refresh() + + # Sanity check for required fields. The list of required fields is + # defined at Guideline 1.3.1.a, but in practice some releases might be + # lacking some of these fields. This function expects at least: + # `artists` (>0), `title`, `id`, `tracklist` (>0) + # https://www.discogs.com/help/doc/submission-guidelines-general-rules + if not all([result.data.get(k) for k in ['artists', 'title', 'id', + 'tracklist']]): + self._log.warn(u"Release does not contain the required fields") + return None + artist, artist_id = self.get_artist([a.data for a in result.artists]) album = re.sub(r' +', ' ', result.title) album_id = result.data['id'] @@ -221,28 +263,53 @@ class DiscogsPlugin(BeetsPlugin): # information and leave us with skeleton `Artist` objects that will # each make an API call just to get the same data back. tracks = self.get_tracks(result.data['tracklist']) - albumtype = ', '.join( - result.data['formats'][0].get('descriptions', [])) or None - va = result.data['artists'][0]['name'].lower() == 'various' - if va: - artist = config['va_name'].get(unicode) - year = result.data['year'] - label = result.data['labels'][0]['name'] - mediums = len(set(t.medium for t in tracks)) - catalogno = result.data['labels'][0]['catno'] - if catalogno == 'none': - catalogno = None + + # Extract information for the optional AlbumInfo fields, if possible. + va = result.data['artists'][0].get('name', '').lower() == 'various' + year = result.data.get('year') + mediums = [t.medium for t in tracks] country = result.data.get('country') - media = result.data['formats'][0]['name'] - data_url = result.data['uri'] + data_url = result.data.get('uri') + + # Extract information for the optional AlbumInfo fields that are + # contained on nested discogs fields. + albumtype = media = label = catalogno = None + if result.data.get('formats'): + albumtype = ', '.join( + result.data['formats'][0].get('descriptions', [])) or None + media = result.data['formats'][0]['name'] + if result.data.get('labels'): + label = result.data['labels'][0].get('name') + catalogno = result.data['labels'][0].get('catno') + + # Additional cleanups (various artists name, catalog number, media). + if va: + artist = config['va_name'].as_str() + if catalogno == 'none': + catalogno = None + # Explicitly set the `media` for the tracks, since it is expected by + # `autotag.apply_metadata`, and set `medium_total`. + for track in tracks: + track.media = media + track.medium_total = mediums.count(track.medium) + # Discogs does not have track IDs. Invent our own IDs as proposed + # in #2336. + track.track_id = str(album_id) + "-" + track.track_alt + + # Retrieve master release id (returns None if there isn't one). + master_id = result.data.get('master_id') + # Assume `original_year` is equal to `year` for releases without + # a master release, otherwise fetch the master release. + original_year = self.get_master_year(master_id) if master_id else year + return AlbumInfo(album, album_id, artist, artist_id, tracks, asin=None, albumtype=albumtype, va=va, year=year, month=None, - day=None, label=label, mediums=mediums, - artist_sort=None, releasegroup_id=None, + day=None, label=label, mediums=len(set(mediums)), + artist_sort=None, releasegroup_id=master_id, catalognum=catalogno, script=None, language=None, country=country, albumstatus=None, media=media, albumdisambig=None, artist_credit=None, - original_year=None, original_month=None, + original_year=original_year, original_month=None, original_day=None, data_source='Discogs', data_url=data_url) @@ -269,38 +336,71 @@ class DiscogsPlugin(BeetsPlugin): def get_tracks(self, tracklist): """Returns a list of TrackInfo objects for a discogs tracklist. """ + try: + clean_tracklist = self.coalesce_tracks(tracklist) + except Exception as exc: + # FIXME: this is an extra precaution for making sure there are no + # side effects after #2222. It should be removed after further + # testing. + self._log.debug(u'{}', traceback.format_exc()) + self._log.error(u'uncaught exception in coalesce_tracks: {}', exc) + clean_tracklist = tracklist tracks = [] index_tracks = {} index = 0 - for track in tracklist: + for track in clean_tracklist: # Only real tracks have `position`. Otherwise, it's an index track. if track['position']: index += 1 - tracks.append(self.get_track_info(track, index)) + track_info = self.get_track_info(track, index) + track_info.track_alt = track['position'] + tracks.append(track_info) else: index_tracks[index + 1] = track['title'] # Fix up medium and medium_index for each track. Discogs position is # unreliable, but tracks are in order. medium = None - medium_count, index_count = 0, 0 + medium_count, index_count, side_count = 0, 0, 0 + sides_per_medium = 1 + + # If a medium has two sides (ie. vinyl or cassette), each pair of + # consecutive sides should belong to the same medium. + if all([track.medium is not None for track in tracks]): + m = sorted(set([track.medium.lower() for track in tracks])) + # If all track.medium are single consecutive letters, assume it is + # a 2-sided medium. + if ''.join(m) in ascii_lowercase: + sides_per_medium = 2 + for track in tracks: # Handle special case where a different medium does not indicate a # new disc, when there is no medium_index and the ordinal of medium # is not sequential. For example, I, II, III, IV, V. Assume these # are the track index, not the medium. + # side_count is the number of mediums or medium sides (in the case + # of two-sided mediums) that were seen before. medium_is_index = track.medium and not track.medium_index and ( len(track.medium) != 1 or - ord(track.medium) - 64 != medium_count + 1 + # Not within standard incremental medium values (A, B, C, ...). + ord(track.medium) - 64 != side_count + 1 ) if not medium_is_index and medium != track.medium: - # Increment medium_count and reset index_count when medium - # changes. + side_count += 1 + if sides_per_medium == 2: + if side_count % sides_per_medium: + # Two-sided medium changed. Reset index_count. + index_count = 0 + medium_count += 1 + else: + # Medium changed. Reset index_count. + medium_count += 1 + index_count = 0 medium = track.medium - medium_count += 1 - index_count = 0 + index_count += 1 + medium_count = 1 if medium_count == 0 else medium_count track.medium, track.medium_index = medium_count, index_count # Get `disctitle` from Discogs index tracks. Assume that an index track @@ -315,30 +415,122 @@ class DiscogsPlugin(BeetsPlugin): return tracks + def coalesce_tracks(self, raw_tracklist): + """Pre-process a tracklist, merging subtracks into a single track. The + title for the merged track is the one from the previous index track, + if present; otherwise it is a combination of the subtracks titles. + """ + def add_merged_subtracks(tracklist, subtracks): + """Modify `tracklist` in place, merging a list of `subtracks` into + a single track into `tracklist`.""" + # Calculate position based on first subtrack, without subindex. + idx, medium_idx, sub_idx = \ + self.get_track_index(subtracks[0]['position']) + position = '%s%s' % (idx or '', medium_idx or '') + + if tracklist and not tracklist[-1]['position']: + # Assume the previous index track contains the track title. + if sub_idx: + # "Convert" the track title to a real track, discarding the + # subtracks assuming they are logical divisions of a + # physical track (12.2.9 Subtracks). + tracklist[-1]['position'] = position + else: + # Promote the subtracks to real tracks, discarding the + # index track, assuming the subtracks are physical tracks. + index_track = tracklist.pop() + # Fix artists when they are specified on the index track. + if index_track.get('artists'): + for subtrack in subtracks: + if not subtrack.get('artists'): + subtrack['artists'] = index_track['artists'] + tracklist.extend(subtracks) + else: + # Merge the subtracks, pick a title, and append the new track. + track = subtracks[0].copy() + track['title'] = ' / '.join([t['title'] for t in subtracks]) + tracklist.append(track) + + # Pre-process the tracklist, trying to identify subtracks. + subtracks = [] + tracklist = [] + prev_subindex = '' + for track in raw_tracklist: + # Regular subtrack (track with subindex). + if track['position']: + _, _, subindex = self.get_track_index(track['position']) + if subindex: + if subindex.rjust(len(raw_tracklist)) > prev_subindex: + # Subtrack still part of the current main track. + subtracks.append(track) + else: + # Subtrack part of a new group (..., 1.3, *2.1*, ...). + add_merged_subtracks(tracklist, subtracks) + subtracks = [track] + prev_subindex = subindex.rjust(len(raw_tracklist)) + continue + + # Index track with nested sub_tracks. + if not track['position'] and 'sub_tracks' in track: + # Append the index track, assuming it contains the track title. + tracklist.append(track) + add_merged_subtracks(tracklist, track['sub_tracks']) + continue + + # Regular track or index track without nested sub_tracks. + if subtracks: + add_merged_subtracks(tracklist, subtracks) + subtracks = [] + prev_subindex = '' + tracklist.append(track) + + # Merge and add the remaining subtracks, if any. + if subtracks: + add_merged_subtracks(tracklist, subtracks) + + return tracklist + def get_track_info(self, track, index): """Returns a TrackInfo object for a discogs track. """ title = track['title'] track_id = None - medium, medium_index = self.get_track_index(track['position']) + medium, medium_index, _ = self.get_track_index(track['position']) artist, artist_id = self.get_artist(track.get('artists', [])) length = self.get_track_length(track['duration']) - return TrackInfo(title, track_id, artist, artist_id, length, index, - medium, medium_index, artist_sort=None, - disctitle=None, artist_credit=None) + return TrackInfo(title, track_id, artist=artist, artist_id=artist_id, + length=length, index=index, + medium=medium, medium_index=medium_index, + artist_sort=None, disctitle=None, artist_credit=None) def get_track_index(self, position): - """Returns the medium and medium index for a discogs track position. - """ - # medium_index is a number at the end of position. medium is everything - # else. E.g. (A)(1), (Side A, Track )(1), (A)(), ()(1), etc. - match = re.match(r'^(.*?)(\d*)$', position.upper()) + """Returns the medium, medium index and subtrack index for a discogs + track position.""" + # Match the standard Discogs positions (12.2.9), which can have several + # forms (1, 1-1, A1, A1.1, A1a, ...). + match = re.match( + r'^(.*?)' # medium: everything before medium_index. + r'(\d*?)' # medium_index: a number at the end of + # `position`, except if followed by a subtrack + # index. + # subtrack_index: can only be matched if medium + # or medium_index have been matched, and can be + r'((?<=\w)\.[\w]+' # - a dot followed by a string (A.1, 2.A) + r'|(?<=\d)[A-Z]+' # - a string that follows a number (1A, B2a) + r')?' + r'$', + position.upper() + ) + if match: - medium, index = match.groups() + medium, index, subindex = match.groups() + + if subindex and subindex.startswith('.'): + subindex = subindex[1:] else: self._log.debug(u'Invalid position: {0}', position) - medium = index = None - return medium or None, index or None + medium = index = subindex = None + return medium or None, index or None, subindex or None def get_track_length(self, duration): """Returns the track length in seconds for a discogs duration. diff --git a/libs/beetsplug/duplicates.py b/libs/beetsplug/duplicates.py index 4f039717..b316cfda 100644 --- a/libs/beetsplug/duplicates.py +++ b/libs/beetsplug/duplicates.py @@ -20,9 +20,11 @@ from __future__ import division, absolute_import, print_function import shlex from beets.plugins import BeetsPlugin -from beets.ui import decargs, print_, vararg_callback, Subcommand, UserError -from beets.util import command_output, displayable_path, subprocess +from beets.ui import decargs, print_, Subcommand, UserError +from beets.util import command_output, displayable_path, subprocess, \ + bytestring_path, MoveOperation from beets.library import Item, Album +import six PLUGIN = 'duplicates' @@ -79,10 +81,9 @@ class DuplicatesPlugin(BeetsPlugin): help=u'report duplicates only if all attributes are set', ) self._command.parser.add_option( - u'-k', u'--keys', dest='keys', - action='callback', metavar='KEY1 KEY2', - callback=vararg_callback, - help=u'report duplicates based on keys', + u'-k', u'--key', dest='keys', + action='append', metavar='KEY', + help=u'report duplicates based on keys (use multiple times)', ) self._command.parser.add_option( u'-M', u'--merge', dest='merge', @@ -112,14 +113,14 @@ class DuplicatesPlugin(BeetsPlugin): self.config.set_args(opts) album = self.config['album'].get(bool) checksum = self.config['checksum'].get(str) - copy = self.config['copy'].get(str) + copy = bytestring_path(self.config['copy'].as_str()) count = self.config['count'].get(bool) delete = self.config['delete'].get(bool) fmt = self.config['format'].get(str) full = self.config['full'].get(bool) - keys = self.config['keys'].get(list) + keys = self.config['keys'].as_str_seq() merge = self.config['merge'].get(bool) - move = self.config['move'].get(str) + move = bytestring_path(self.config['move'].as_str()) path = self.config['path'].get(bool) tiebreak = self.config['tiebreak'].get(dict) strict = self.config['strict'].get(bool) @@ -135,15 +136,15 @@ class DuplicatesPlugin(BeetsPlugin): items = lib.items(decargs(args)) if path: - fmt = '$path' + fmt = u'$path' # Default format string for count mode. if count and not fmt: if album: - fmt = '$albumartist - $album' + fmt = u'$albumartist - $album' else: - fmt = '$albumartist - $album - $title' - fmt += ': {0}' + fmt = u'$albumartist - $album - $title' + fmt += u': {0}' if checksum: for i in items: @@ -169,22 +170,22 @@ class DuplicatesPlugin(BeetsPlugin): return [self._command] def _process_item(self, item, copy=False, move=False, delete=False, - tag=False, fmt=''): + tag=False, fmt=u''): """Process Item `item`. """ print_(format(item, fmt)) if copy: - item.move(basedir=copy, copy=True) + item.move(basedir=copy, operation=MoveOperation.COPY) item.store() if move: - item.move(basedir=move, copy=False) + item.move(basedir=move) item.store() if delete: item.remove(delete=True) if tag: try: k, v = tag.split('=') - except: + except Exception: raise UserError( u"{}: can't parse k=v tag: {}".format(PLUGIN, tag) ) @@ -252,20 +253,19 @@ class DuplicatesPlugin(BeetsPlugin): "completeness" (objects with more non-null fields come first) and Albums are ordered by their track count. """ - if tiebreak: - kind = 'items' if all(isinstance(o, Item) - for o in objs) else 'albums' + kind = 'items' if all(isinstance(o, Item) for o in objs) else 'albums' + + if tiebreak and kind in tiebreak.keys(): key = lambda x: tuple(getattr(x, k) for k in tiebreak[kind]) else: - kind = Item if all(isinstance(o, Item) for o in objs) else Album - if kind is Item: + if kind == 'items': def truthy(v): # Avoid a Unicode warning by avoiding comparison # between a bytes object and the empty Unicode # string ''. return v is not None and \ - (v != '' if isinstance(v, unicode) else True) - fields = kind.all_keys() + (v != '' if isinstance(v, six.text_type) else True) + fields = Item.all_keys() key = lambda x: sum(1 for f in fields if truthy(getattr(x, f))) else: key = lambda x: len(x.items()) @@ -311,7 +311,7 @@ class DuplicatesPlugin(BeetsPlugin): objs[0], displayable_path(o.path), displayable_path(missing.destination())) - missing.move(copy=True) + missing.move(operation=MoveOperation.COPY) return objs def _merge(self, objs): @@ -329,7 +329,7 @@ class DuplicatesPlugin(BeetsPlugin): """Generate triples of keys, duplicate counts, and constituent objects. """ offset = 0 if full else 1 - for k, objs in self._group_by(objs, keys, strict).iteritems(): + for k, objs in self._group_by(objs, keys, strict).items(): if len(objs) > 1: objs = self._order(objs, tiebreak) if merge: diff --git a/libs/beetsplug/edit.py b/libs/beetsplug/edit.py index 5c7796ee..631a1b58 100644 --- a/libs/beetsplug/edit.py +++ b/libs/beetsplug/edit.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016 # @@ -22,11 +23,12 @@ from beets import ui from beets.dbcore import types from beets.importer import action from beets.ui.commands import _do_query, PromptChoice -from copy import deepcopy +import codecs import subprocess import yaml from tempfile import NamedTemporaryFile import os +import six # These "safe" types can avoid the format/parse cycle that most fields go @@ -82,7 +84,7 @@ def load(s): # Convert all keys to strings. They started out as strings, # but the user may have inadvertently messed this up. - out.append({unicode(k): v for k, v in d.items()}) + out.append({six.text_type(k): v for k, v in d.items()}) except yaml.YAMLError as e: raise ParseError(u'invalid YAML: {}'.format(e)) @@ -141,7 +143,7 @@ def apply_(obj, data): else: # Either the field was stringified originally or the user changed # it from a safe type to an unsafe one. Parse it as a string. - obj.set_parse(key, unicode(value)) + obj.set_parse(key, six.text_type(value)) class EditPlugin(plugins.BeetsPlugin): @@ -242,9 +244,15 @@ class EditPlugin(plugins.BeetsPlugin): old_data = [flatten(o, fields) for o in objs] # Set up a temporary file with the initial data for editing. - new = NamedTemporaryFile(suffix='.yaml', delete=False) + if six.PY2: + new = NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) + else: + new = NamedTemporaryFile(mode='w', suffix='.yaml', delete=False, + encoding='utf-8') old_str = dump(old_data) new.write(old_str) + if six.PY2: + old_str = old_str.decode('utf-8') new.close() # Loop until we have parseable data and the user confirms. @@ -255,7 +263,7 @@ class EditPlugin(plugins.BeetsPlugin): # Read the data back after editing and check whether anything # changed. - with open(new.name) as f: + with codecs.open(new.name, encoding='utf-8') as f: new_str = f.read() if new_str == old_str: ui.print_(u"No changes; aborting.") @@ -274,7 +282,7 @@ class EditPlugin(plugins.BeetsPlugin): # Show the changes. # If the objects are not on the DB yet, we need a copy of their # original state for show_model_changes. - objs_old = [deepcopy(obj) if not obj._db else None + objs_old = [obj.copy() if obj.id < 0 else None for obj in objs] self.apply_data(objs, old_data, new_data) changed = False @@ -293,9 +301,13 @@ class EditPlugin(plugins.BeetsPlugin): elif choice == u'c': # Cancel. return False elif choice == u'e': # Keep editing. - # Reset the temporary changes to the objects. + # Reset the temporary changes to the objects. I we have a + # copy from above, use that, else reload from the database. + objs = [(old_obj or obj) + for old_obj, obj in zip(objs_old, objs)] for obj in objs: - obj.read() + if not obj.id < 0: + obj.load() continue # Remove the temporary file before returning. @@ -310,8 +322,8 @@ class EditPlugin(plugins.BeetsPlugin): are temporary. """ if len(old_data) != len(new_data): - self._log.warn(u'number of objects changed from {} to {}', - len(old_data), len(new_data)) + self._log.warning(u'number of objects changed from {} to {}', + len(old_data), len(new_data)) obj_by_id = {o.id: o for o in objs} ignore_fields = self.config['ignore_fields'].as_str_seq() @@ -321,7 +333,7 @@ class EditPlugin(plugins.BeetsPlugin): forbidden = False for key in ignore_fields: if old_dict.get(key) != new_dict.get(key): - self._log.warn(u'ignoring object whose {} changed', key) + self._log.warning(u'ignoring object whose {} changed', key) forbidden = True break if forbidden: @@ -356,9 +368,13 @@ class EditPlugin(plugins.BeetsPlugin): """Callback for invoking the functionality during an interactive import session on the *original* item tags. """ - # Assign temporary ids to the Items. - for i, obj in enumerate(task.items): - obj.id = i + 1 + # Assign negative temporary ids to Items that are not in the database + # yet. By using negative values, no clash with items in the database + # can occur. + for i, obj in enumerate(task.items, start=1): + # The importer may set the id to None when re-importing albums. + if not obj._db or obj.id is None: + obj.id = -i # Present the YAML to the user and let her change it. fields = self._get_fields(album=False, extra=[]) @@ -366,7 +382,8 @@ class EditPlugin(plugins.BeetsPlugin): # Remove temporary ids. for obj in task.items: - obj.id = None + if obj.id < 0: + obj.id = None # Save the new data. if success: diff --git a/libs/beetsplug/embedart.py b/libs/beetsplug/embedart.py index 71b05f37..afe8f86f 100644 --- a/libs/beetsplug/embedart.py +++ b/libs/beetsplug/embedart.py @@ -20,13 +20,35 @@ import os.path from beets.plugins import BeetsPlugin from beets import ui -from beets.ui import decargs +from beets.ui import print_, decargs from beets.util import syspath, normpath, displayable_path, bytestring_path from beets.util.artresizer import ArtResizer from beets import config from beets import art +def _confirm(objs, album): + """Show the list of affected objects (items or albums) and confirm + that the user wants to modify their artwork. + + `album` is a Boolean indicating whether these are albums (as opposed + to items). + """ + noun = u'album' if album else u'file' + prompt = u'Modify artwork for {} {}{} (Y/n)?'.format( + len(objs), + noun, + u's' if len(objs) > 1 else u'' + ) + + # Show all the items or albums. + for obj in objs: + print_(format(obj)) + + # Confirm with user. + return ui.input_yn(prompt) + + class EmbedCoverArtPlugin(BeetsPlugin): """Allows albumart to be embedded into the actual files. """ @@ -60,6 +82,9 @@ class EmbedCoverArtPlugin(BeetsPlugin): embed_cmd.parser.add_option( u'-f', u'--file', metavar='PATH', help=u'the image file to embed' ) + embed_cmd.parser.add_option( + u"-y", u"--yes", action="store_true", help=u"skip confirmation" + ) maxwidth = self.config['maxwidth'].get(int) compare_threshold = self.config['compare_threshold'].get(int) ifempty = self.config['ifempty'].get(bool) @@ -71,11 +96,24 @@ class EmbedCoverArtPlugin(BeetsPlugin): raise ui.UserError(u'image file {0} not found'.format( displayable_path(imagepath) )) - for item in lib.items(decargs(args)): + + items = lib.items(decargs(args)) + + # Confirm with user. + if not opts.yes and not _confirm(items, not opts.file): + return + + for item in items: art.embed_item(self._log, item, imagepath, maxwidth, None, compare_threshold, ifempty) else: - for album in lib.albums(decargs(args)): + albums = lib.albums(decargs(args)) + + # Confirm with user. + if not opts.yes and not _confirm(albums, not opts.file): + return + + for album in albums: art.embed_album(self._log, album, maxwidth, False, compare_threshold, ifempty) self.remove_artfile(album) @@ -107,7 +145,7 @@ class EmbedCoverArtPlugin(BeetsPlugin): else: filename = bytestring_path(opts.filename or config['art_filename'].get()) - if os.path.dirname(filename) != '': + if os.path.dirname(filename) != b'': self._log.error( u"Only specify a name rather than a path for -n") return @@ -125,8 +163,15 @@ class EmbedCoverArtPlugin(BeetsPlugin): 'clearart', help=u'remove images from file metadata', ) + clear_cmd.parser.add_option( + u"-y", u"--yes", action="store_true", help=u"skip confirmation" + ) def clear_func(lib, opts, args): + items = lib.items(decargs(args)) + # Confirm with user. + if not opts.yes and not _confirm(items, False): + return art.clear(self._log, lib, decargs(args)) clear_cmd.func = clear_func diff --git a/libs/beetsplug/embyupdate.py b/libs/beetsplug/embyupdate.py index 38f8929e..5c731954 100644 --- a/libs/beetsplug/embyupdate.py +++ b/libs/beetsplug/embyupdate.py @@ -6,22 +6,51 @@ host: localhost port: 8096 username: user + apikey: apikey password: password """ from __future__ import division, absolute_import, print_function -from beets import config -from beets.plugins import BeetsPlugin -from urllib import urlencode -from urlparse import urljoin, parse_qs, urlsplit, urlunsplit import hashlib import requests +from six.moves.urllib.parse import urlencode +from six.moves.urllib.parse import urljoin, parse_qs, urlsplit, urlunsplit + +from beets import config +from beets.plugins import BeetsPlugin + def api_url(host, port, endpoint): """Returns a joined url. + + Takes host, port and endpoint and generates a valid emby API url. + + :param host: Hostname of the emby server + :param port: Portnumber of the emby server + :param endpoint: API endpoint + :type host: str + :type port: int + :type endpoint: str + :returns: Full API url + :rtype: str """ - joined = urljoin('http://{0}:{1}'.format(host, port), endpoint) + # check if http or https is defined as host and create hostname + hostname_list = [host] + if host.startswith('http://') or host.startswith('https://'): + hostname = ''.join(hostname_list) + else: + hostname_list.insert(0, 'http://') + hostname = ''.join(hostname_list) + + joined = urljoin( + '{hostname}:{port}'.format( + hostname=hostname, + port=port + ), + endpoint + ) + scheme, netloc, path, query_string, fragment = urlsplit(joined) query_params = parse_qs(query_string) @@ -33,34 +62,62 @@ def api_url(host, port, endpoint): def password_data(username, password): """Returns a dict with username and its encoded password. + + :param username: Emby username + :param password: Emby password + :type username: str + :type password: str + :returns: Dictionary with username and encoded password + :rtype: dict """ return { 'username': username, - 'password': hashlib.sha1(password).hexdigest(), - 'passwordMd5': hashlib.md5(password).hexdigest() + 'password': hashlib.sha1(password.encode('utf-8')).hexdigest(), + 'passwordMd5': hashlib.md5(password.encode('utf-8')).hexdigest() } def create_headers(user_id, token=None): """Return header dict that is needed to talk to the Emby API. + + :param user_id: Emby user ID + :param token: Authentication token for Emby + :type user_id: str + :type token: str + :returns: Headers for requests + :rtype: dict """ - headers = { - 'Authorization': 'MediaBrowser', - 'UserId': user_id, - 'Client': 'other', - 'Device': 'empy', - 'DeviceId': 'beets', - 'Version': '0.0.0' - } + headers = {} + + authorization = ( + 'MediaBrowser UserId="{user_id}", ' + 'Client="other", ' + 'Device="beets", ' + 'DeviceId="beets", ' + 'Version="0.0.0"' + ).format(user_id=user_id) + + headers['x-emby-authorization'] = authorization if token: - headers['X-MediaBrowser-Token'] = token + headers['x-mediabrowser-token'] = token return headers def get_token(host, port, headers, auth_data): """Return token for a user. + + :param host: Emby host + :param port: Emby port + :param headers: Headers for requests + :param auth_data: Username and encoded password for authentication + :type host: str + :type port: int + :type headers: dict + :type auth_data: dict + :returns: Access Token + :rtype: str """ url = api_url(host, port, '/Users/AuthenticateByName') r = requests.post(url, headers=headers, data=auth_data) @@ -70,6 +127,15 @@ def get_token(host, port, headers, auth_data): def get_user(host, port, username): """Return user dict from server or None if there is no user. + + :param host: Emby host + :param port: Emby port + :username: Username + :type host: str + :type port: int + :type username: str + :returns: Matched Users + :rtype: list """ url = api_url(host, port, '/Users/Public') r = requests.get(url) @@ -84,8 +150,10 @@ class EmbyUpdate(BeetsPlugin): # Adding defaults. config['emby'].add({ - u'host': u'localhost', - u'port': 8096 + u'host': u'http://localhost', + u'port': 8096, + u'apikey': None, + u'password': None, }) self.register_listener('database_change', self.listen_for_db_change) @@ -104,6 +172,12 @@ class EmbyUpdate(BeetsPlugin): port = config['emby']['port'].get() username = config['emby']['username'].get() password = config['emby']['password'].get() + token = config['emby']['apikey'].get() + + # Check if at least a apikey or password is given. + if not any([password, token]): + self._log.warning(u'Provide at least Emby password or apikey.') + return # Get user information from the Emby API. user = get_user(host, port, username) @@ -111,17 +185,18 @@ class EmbyUpdate(BeetsPlugin): self._log.warning(u'User {0} could not be found.'.format(username)) return - # Create Authentication data and headers. - auth_data = password_data(username, password) - headers = create_headers(user[0]['Id']) - - # Get authentication token. - token = get_token(host, port, headers, auth_data) if not token: - self._log.warning( - u'Could not get token for user {0}', username - ) - return + # Create Authentication data and headers. + auth_data = password_data(username, password) + headers = create_headers(user[0]['Id']) + + # Get authentication token. + token = get_token(host, port, headers, auth_data) + if not token: + self._log.warning( + u'Could not get token for user {0}', username + ) + return # Recreate headers with a token. headers = create_headers(user[0]['Id'], token=token) diff --git a/libs/beetsplug/fetchart.py b/libs/beetsplug/fetchart.py index 2cc362e4..0e106694 100644 --- a/libs/beetsplug/fetchart.py +++ b/libs/beetsplug/fetchart.py @@ -29,8 +29,11 @@ from beets import importer from beets import ui from beets import util from beets import config +from beets.mediafile import image_mime_type from beets.util.artresizer import ArtResizer from beets.util import confit +from beets.util import syspath, bytestring_path, py3_path +import six try: import itunes @@ -38,9 +41,11 @@ try: except ImportError: HAVE_ITUNES = False -IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg'] -CONTENT_TYPES = ('image/jpeg', 'image/png') -DOWNLOAD_EXTENSION = '.jpg' +CONTENT_TYPES = { + 'image/jpeg': [b'jpg', b'jpeg'], + 'image/png': [b'png'] +} +IMAGE_EXTENSIONS = [ext for exts in CONTENT_TYPES.values() for ext in exts] class Candidate(object): @@ -64,7 +69,7 @@ class Candidate(object): self.match = match self.size = size - def _validate(self, extra): + def _validate(self, plugin): """Determine whether the candidate artwork is valid based on its dimensions (width and ratio). @@ -75,9 +80,7 @@ class Candidate(object): if not self.path: return self.CANDIDATE_BAD - if not (extra['enforce_ratio'] or - extra['minwidth'] or - extra['maxwidth']): + if not (plugin.enforce_ratio or plugin.minwidth or plugin.maxwidth): return self.CANDIDATE_EXACT # get_size returns None if no local imaging backend is available @@ -96,22 +99,22 @@ class Candidate(object): long_edge = max(self.size) # Check minimum size. - if extra['minwidth'] and self.size[0] < extra['minwidth']: + if plugin.minwidth and self.size[0] < plugin.minwidth: self._log.debug(u'image too small ({} < {})', - self.size[0], extra['minwidth']) + self.size[0], plugin.minwidth) return self.CANDIDATE_BAD # Check aspect ratio. edge_diff = long_edge - short_edge - if extra['enforce_ratio']: - if extra['margin_px']: - if edge_diff > extra['margin_px']: + if plugin.enforce_ratio: + if plugin.margin_px: + if edge_diff > plugin.margin_px: self._log.debug(u'image is not close enough to being ' u'square, ({} - {} > {})', - long_edge, short_edge, extra['margin_px']) + long_edge, short_edge, plugin.margin_px) return self.CANDIDATE_BAD - elif extra['margin_percent']: - margin_px = extra['margin_percent'] * long_edge + elif plugin.margin_percent: + margin_px = plugin.margin_percent * long_edge if edge_diff > margin_px: self._log.debug(u'image is not close enough to being ' u'square, ({} - {} > {})', @@ -124,20 +127,20 @@ class Candidate(object): return self.CANDIDATE_BAD # Check maximum size. - if extra['maxwidth'] and self.size[0] > extra['maxwidth']: + if plugin.maxwidth and self.size[0] > plugin.maxwidth: self._log.debug(u'image needs resizing ({} > {})', - self.size[0], extra['maxwidth']) + self.size[0], plugin.maxwidth) return self.CANDIDATE_DOWNSCALE return self.CANDIDATE_EXACT - def validate(self, extra): - self.check = self._validate(extra) + def validate(self, plugin): + self.check = self._validate(plugin) return self.check - def resize(self, extra): - if extra['maxwidth'] and self.check == self.CANDIDATE_DOWNSCALE: - self.path = ArtResizer.shared.resize(extra['maxwidth'], self.path) + def resize(self, plugin): + if plugin.maxwidth and self.check == self.CANDIDATE_DOWNSCALE: + self.path = ArtResizer.shared.resize(plugin.maxwidth, self.path) def _logged_get(log, *args, **kwargs): @@ -189,17 +192,20 @@ class RequestMixin(object): # ART SOURCES ################################################################ class ArtSource(RequestMixin): - def __init__(self, log, config): + VALID_MATCHING_CRITERIA = ['default'] + + def __init__(self, log, config, match_by=None): self._log = log self._config = config + self.match_by = match_by or self.VALID_MATCHING_CRITERIA - def get(self, album, extra): + def get(self, album, plugin, paths): raise NotImplementedError() def _candidate(self, **kwargs): return Candidate(source=self, log=self._log, **kwargs) - def fetch_image(self, candidate, extra): + def fetch_image(self, candidate, plugin): raise NotImplementedError() @@ -207,7 +213,7 @@ class LocalArtSource(ArtSource): IS_LOCAL = True LOC_STR = u'local' - def fetch_image(self, candidate, extra): + def fetch_image(self, candidate, plugin): pass @@ -215,58 +221,94 @@ class RemoteArtSource(ArtSource): IS_LOCAL = False LOC_STR = u'remote' - def fetch_image(self, candidate, extra): + def fetch_image(self, candidate, plugin): """Downloads an image from a URL and checks whether it seems to actually be an image. If so, returns a path to the downloaded image. Otherwise, returns None. """ - if extra['maxwidth']: - candidate.url = ArtResizer.shared.proxy_url(extra['maxwidth'], + if plugin.maxwidth: + candidate.url = ArtResizer.shared.proxy_url(plugin.maxwidth, candidate.url) try: with closing(self.request(candidate.url, stream=True, message=u'downloading image')) as resp: - if 'Content-Type' not in resp.headers \ - or resp.headers['Content-Type'] not in CONTENT_TYPES: - self._log.debug( - u'not a supported image: {}', - resp.headers.get('Content-Type') or u'no content type', - ) - candidate.path = None + ct = resp.headers.get('Content-Type', None) + + # Download the image to a temporary file. As some servers + # (notably fanart.tv) have proven to return wrong Content-Types + # when images were uploaded with a bad file extension, do not + # rely on it. Instead validate the type using the file magic + # and only then determine the extension. + data = resp.iter_content(chunk_size=1024) + header = b'' + for chunk in data: + header += chunk + if len(header) >= 32: + # The imghdr module will only read 32 bytes, and our + # own additions in mediafile even less. + break + else: + # server didn't return enough data, i.e. corrupt image return - # Generate a temporary file with the correct extension. - with NamedTemporaryFile(suffix=DOWNLOAD_EXTENSION, - delete=False) as fh: - for chunk in resp.iter_content(chunk_size=1024): + real_ct = image_mime_type(header) + if real_ct is None: + # detection by file magic failed, fall back to the + # server-supplied Content-Type + # Is our type detection failsafe enough to drop this? + real_ct = ct + + if real_ct not in CONTENT_TYPES: + self._log.debug(u'not a supported image: {}', + real_ct or u'unknown content type') + return + + ext = b'.' + CONTENT_TYPES[real_ct][0] + + if real_ct != ct: + self._log.warning(u'Server specified {}, but returned a ' + u'{} image. Correcting the extension ' + u'to {}', + ct, real_ct, ext) + + suffix = py3_path(ext) + with NamedTemporaryFile(suffix=suffix, delete=False) as fh: + # write the first already loaded part of the image + fh.write(header) + # download the remaining part of the image + for chunk in data: fh.write(chunk) self._log.debug(u'downloaded art to: {0}', util.displayable_path(fh.name)) - candidate.path = fh.name + candidate.path = util.bytestring_path(fh.name) return except (IOError, requests.RequestException, TypeError) as exc: # Handling TypeError works around a urllib3 bug: # https://github.com/shazow/urllib3/issues/556 self._log.debug(u'error fetching art: {}', exc) - candidate.path = None return class CoverArtArchive(RemoteArtSource): NAME = u"Cover Art Archive" + VALID_MATCHING_CRITERIA = ['release', 'releasegroup'] - URL = 'http://coverartarchive.org/release/{mbid}/front' - GROUP_URL = 'http://coverartarchive.org/release-group/{mbid}/front' + if util.SNI_SUPPORTED: + URL = 'https://coverartarchive.org/release/{mbid}/front' + GROUP_URL = 'https://coverartarchive.org/release-group/{mbid}/front' + else: + URL = 'http://coverartarchive.org/release/{mbid}/front' + GROUP_URL = 'http://coverartarchive.org/release-group/{mbid}/front' - def get(self, album, extra): + def get(self, album, plugin, paths): """Return the Cover Art Archive and Cover Art Archive release group URLs using album MusicBrainz release ID and release group ID. """ - if album.mb_albumid: + if 'release' in self.match_by and album.mb_albumid: yield self._candidate(url=self.URL.format(mbid=album.mb_albumid), match=Candidate.MATCH_EXACT) - if album.mb_releasegroupid: + if 'releasegroup' in self.match_by and album.mb_releasegroupid: yield self._candidate( url=self.GROUP_URL.format(mbid=album.mb_releasegroupid), match=Candidate.MATCH_FALLBACK) @@ -277,7 +319,7 @@ class Amazon(RemoteArtSource): URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg' INDICES = (1, 2) - def get(self, album, extra): + def get(self, album, plugin, paths): """Generate URLs using Amazon ID (ASIN) string. """ if album.asin: @@ -291,7 +333,7 @@ class AlbumArtOrg(RemoteArtSource): URL = 'http://www.albumart.org/index_detail.php' PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"' - def get(self, album, extra): + def get(self, album, plugin, paths): """Return art URL from AlbumArt.org using album ASIN. """ if not album.asin: @@ -322,7 +364,7 @@ class GoogleImages(RemoteArtSource): self.key = self._config['google_key'].get(), self.cx = self._config['google_engine'].get(), - def get(self, album, extra): + def get(self, album, plugin, paths): """Return art URL from google custom search engine given an album title and interpreter. """ @@ -358,8 +400,7 @@ class GoogleImages(RemoteArtSource): class FanartTV(RemoteArtSource): """Art from fanart.tv requested using their API""" NAME = u"fanart.tv" - - API_URL = 'http://webservice.fanart.tv/v3/' + API_URL = 'https://webservice.fanart.tv/v3/' API_ALBUMS = API_URL + 'music/albums/' PROJECT_KEY = '61a7d0ab4e67162b7a0c7c35915cd48e' @@ -367,7 +408,7 @@ class FanartTV(RemoteArtSource): super(FanartTV, self).__init__(*args, **kwargs) self.client_key = self._config['fanarttv_key'].get() - def get(self, album, extra): + def get(self, album, plugin, paths): if not album.mb_releasegroupid: return @@ -418,7 +459,7 @@ class FanartTV(RemoteArtSource): class ITunesStore(RemoteArtSource): NAME = u"iTunes Store" - def get(self, album, extra): + def get(self, album, plugin, paths): """Return art URL from iTunes Store given an album title. """ if not (album.albumartist and album.album): @@ -452,8 +493,8 @@ class ITunesStore(RemoteArtSource): class Wikipedia(RemoteArtSource): NAME = u"Wikipedia (queried through DBpedia)" - DBPEDIA_URL = 'http://dbpedia.org/sparql' - WIKIPEDIA_URL = 'http://en.wikipedia.org/w/api.php' + DBPEDIA_URL = 'https://dbpedia.org/sparql' + WIKIPEDIA_URL = 'https://en.wikipedia.org/w/api.php' SPARQL_QUERY = u'''PREFIX rdf: PREFIX dbpprop: PREFIX owl: @@ -476,7 +517,7 @@ class Wikipedia(RemoteArtSource): }} Limit 1''' - def get(self, album, extra): + def get(self, album, plugin, paths): if not (album.albumartist and album.album): return @@ -566,7 +607,7 @@ class Wikipedia(RemoteArtSource): try: data = wikipedia_response.json() results = data['query']['pages'] - for _, result in results.iteritems(): + for _, result in results.items(): image_url = result['imageinfo'][0]['url'] yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT) @@ -588,26 +629,26 @@ class FileSystem(LocalArtSource): """ return [idx for (idx, x) in enumerate(cover_names) if x in filename] - def get(self, album, extra): + def get(self, album, plugin, paths): """Look for album art files in the specified directories. """ - paths = extra['paths'] if not paths: return - cover_names = extra['cover_names'] - cover_pat = br"(\b|_)({0})(\b|_)".format(b'|'.join(cover_names)) - cautious = extra['cautious'] + cover_names = list(map(util.bytestring_path, plugin.cover_names)) + cover_names_str = b'|'.join(cover_names) + cover_pat = br''.join([br"(\b|_)(", cover_names_str, br")(\b|_)"]) for path in paths: - if not os.path.isdir(path): + if not os.path.isdir(syspath(path)): continue # Find all files that look like images in the directory. images = [] - for fn in os.listdir(path): + for fn in os.listdir(syspath(path)): + fn = bytestring_path(fn) for ext in IMAGE_EXTENSIONS: - if fn.lower().endswith(b'.' + ext.encode('utf8')) and \ - os.path.isfile(os.path.join(path, fn)): + if fn.lower().endswith(b'.' + ext) and \ + os.path.isfile(syspath(os.path.join(path, fn))): images.append(fn) # Look for "preferred" filenames. @@ -625,7 +666,7 @@ class FileSystem(LocalArtSource): remaining.append(fn) # Fall back to any image in the folder. - if remaining and not cautious: + if remaining and not plugin.cautious: self._log.debug(u'using fallback art file {0}', util.displayable_path(remaining[0])) yield self._candidate(path=os.path.join(path, remaining[0]), @@ -691,7 +732,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): confit.String(pattern=self.PAT_PERCENT)])) self.margin_px = None self.margin_percent = None - if type(self.enforce_ratio) is unicode: + if type(self.enforce_ratio) is six.text_type: if self.enforce_ratio[-1] == u'%': self.margin_percent = float(self.enforce_ratio[:-1]) / 100 elif self.enforce_ratio[-2:] == u'px': @@ -702,7 +743,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): self.enforce_ratio = True cover_names = self.config['cover_names'].as_str_seq() - self.cover_names = map(util.bytestring_path, cover_names) + self.cover_names = list(map(util.bytestring_path, cover_names)) self.cautious = self.config['cautious'].get(bool) self.store_source = self.config['store_source'].get(bool) @@ -720,20 +761,30 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): if not self.config['google_key'].get() and \ u'google' in available_sources: available_sources.remove(u'google') - sources_name = plugins.sanitize_choices( - self.config['sources'].as_str_seq(), available_sources) + available_sources = [(s, c) + for s in available_sources + for c in ART_SOURCES[s].VALID_MATCHING_CRITERIA] + sources = plugins.sanitize_pairs( + self.config['sources'].as_pairs(default_value='*'), + available_sources) + if 'remote_priority' in self.config: self._log.warning( u'The `fetch_art.remote_priority` configuration option has ' - u'been deprecated, see the documentation.') + u'been deprecated. Instead, place `filesystem` at the end of ' + u'your `sources` list.') if self.config['remote_priority'].get(bool): - try: - sources_name.remove(u'filesystem') - sources_name.append(u'filesystem') - except ValueError: - pass - self.sources = [ART_SOURCES[s](self._log, self.config) - for s in sources_name] + fs = [] + others = [] + for s, c in sources: + if s == 'filesystem': + fs.append((s, c)) + else: + others.append((s, c)) + sources = others + fs + + self.sources = [ART_SOURCES[s](self._log, self.config, match_by=[c]) + for s, c in sources] # Asynchronous; after music is added to the library. def fetch_art(self, session, task): @@ -745,7 +796,8 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): if task.choice_flag == importer.action.ASIS: # For as-is imports, don't search Web sources for art. local = True - elif task.choice_flag == importer.action.APPLY: + elif task.choice_flag in (importer.action.APPLY, + importer.action.RETAG): # Search everywhere for art. local = False else: @@ -786,9 +838,15 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): action='store_true', default=False, help=u're-download art when already present' ) + cmd.parser.add_option( + u'-q', u'--quiet', dest='quiet', + action='store_true', default=False, + help=u'shows only quiet art' + ) def func(lib, opts, args): - self.batch_fetch_art(lib, lib.albums(ui.decargs(args)), opts.force) + self.batch_fetch_art(lib, lib.albums(ui.decargs(args)), opts.force, + opts.quiet) cmd.func = func return [cmd] @@ -803,16 +861,6 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): """ out = None - # all the information any of the sources might need - extra = {'paths': paths, - 'cover_names': self.cover_names, - 'cautious': self.cautious, - 'enforce_ratio': self.enforce_ratio, - 'margin_px': self.margin_px, - 'margin_percent': self.margin_percent, - 'minwidth': self.minwidth, - 'maxwidth': self.maxwidth} - for source in self.sources: if source.IS_LOCAL or not local_only: self._log.debug( @@ -822,9 +870,9 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): ) # URLs might be invalid at this point, or the image may not # fulfill the requirements - for candidate in source.get(album, extra): - source.fetch_image(candidate, extra) - if candidate.validate(extra): + for candidate in source.get(album, self, paths): + source.fetch_image(candidate, self) + if candidate.validate(self): out = candidate self._log.debug( u'using {0.LOC_STR} image {1}'.format( @@ -834,17 +882,20 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): break if out: - out.resize(extra) + out.resize(self) return out - def batch_fetch_art(self, lib, albums, force): + def batch_fetch_art(self, lib, albums, force, quiet): """Fetch album art for each of the albums. This implements the manual fetchart CLI command. """ for album in albums: if album.artpath and not force and os.path.isfile(album.artpath): - message = ui.colorize('text_highlight_minor', u'has album art') + if not quiet: + message = ui.colorize('text_highlight_minor', + u'has album art') + self._log.info(u'{0}: {1}', album, message) else: # In ordinary invocations, look for images on the # filesystem. When forcing, however, always go to the Web @@ -857,5 +908,4 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): message = ui.colorize('text_success', u'found album art') else: message = ui.colorize('text_error', u'no art found') - - self._log.info(u'{0}: {1}', album, message) + self._log.info(u'{0}: {1}', album, message) diff --git a/libs/beetsplug/filefilter.py b/libs/beetsplug/filefilter.py index 72b5ea9e..23dac574 100644 --- a/libs/beetsplug/filefilter.py +++ b/libs/beetsplug/filefilter.py @@ -20,6 +20,7 @@ from __future__ import division, absolute_import, print_function import re from beets import config +from beets.util import bytestring_path from beets.plugins import BeetsPlugin from beets.importer import SingletonImportTask @@ -35,14 +36,15 @@ class FileFilterPlugin(BeetsPlugin): self.path_album_regex = \ self.path_singleton_regex = \ - re.compile(self.config['path'].get()) + re.compile(bytestring_path(self.config['path'].get())) if 'album_path' in self.config: - self.path_album_regex = re.compile(self.config['album_path'].get()) + self.path_album_regex = re.compile( + bytestring_path(self.config['album_path'].get())) if 'singleton_path' in self.config: self.path_singleton_regex = re.compile( - self.config['singleton_path'].get()) + bytestring_path(self.config['singleton_path'].get())) def import_task_created_event(self, session, task): if task.items and len(task.items) > 0: @@ -69,6 +71,7 @@ class FileFilterPlugin(BeetsPlugin): of the file given in full_path. """ import_config = dict(config['import']) + full_path = bytestring_path(full_path) if 'singletons' not in import_config or not import_config[ 'singletons']: # Album diff --git a/libs/beetsplug/fromfilename.py b/libs/beetsplug/fromfilename.py index e9c49bee..56b68f75 100644 --- a/libs/beetsplug/fromfilename.py +++ b/libs/beetsplug/fromfilename.py @@ -22,34 +22,26 @@ from beets import plugins from beets.util import displayable_path import os import re +import six # Filename field extraction patterns. PATTERNS = [ - # "01 - Track 01" and "01": do nothing - r'^(\d+)\s*-\s*track\s*\d$', - r'^\d+$', - - # Useful patterns. - r'^(?P.+)-(?P.+)-(?P<tag>.*)$', - r'^(?P<track>\d+)\s*-(?P<artist>.+)-(?P<title>.+)-(?P<tag>.*)$', - r'^(?P<track>\d+)\s(?P<artist>.+)-(?P<title>.+)-(?P<tag>.*)$', - r'^(?P<artist>.+)-(?P<title>.+)$', - r'^(?P<track>\d+)\.\s*(?P<artist>.+)-(?P<title>.+)$', - r'^(?P<track>\d+)\s*-\s*(?P<artist>.+)-(?P<title>.+)$', - r'^(?P<track>\d+)\s*-(?P<artist>.+)-(?P<title>.+)$', - r'^(?P<track>\d+)\s(?P<artist>.+)-(?P<title>.+)$', - r'^(?P<title>.+)$', - r'^(?P<track>\d+)\.\s*(?P<title>.+)$', - r'^(?P<track>\d+)\s*-\s*(?P<title>.+)$', - r'^(?P<track>\d+)\s(?P<title>.+)$', - r'^(?P<title>.+) by (?P<artist>.+)$', + # Useful patterns. + r'^(?P<artist>.+)[\-_](?P<title>.+)[\-_](?P<tag>.*)$', + r'^(?P<track>\d+)[\s.\-_]+(?P<artist>.+)[\-_](?P<title>.+)[\-_](?P<tag>.*)$', + r'^(?P<artist>.+)[\-_](?P<title>.+)$', + r'^(?P<track>\d+)[\s.\-_]+(?P<artist>.+)[\-_](?P<title>.+)$', + r'^(?P<title>.+)$', + r'^(?P<track>\d+)[\s.\-_]+(?P<title>.+)$', + r'^(?P<track>\d+)\s+(?P<title>.+)$', + r'^(?P<title>.+) by (?P<artist>.+)$', + r'^(?P<track>\d+).*$', ] # Titles considered "empty" and in need of replacement. BAD_TITLE_PATTERNS = [ r'^$', - r'\d+?\s?-?\s*track\s*\d+', ] @@ -100,7 +92,7 @@ def apply_matches(d): """Given a mapping from items to field dicts, apply the fields to the objects. """ - some_map = d.values()[0] + some_map = list(d.values())[0] keys = some_map.keys() # Only proceed if the "tag" field is equal across all filenames. @@ -132,7 +124,7 @@ def apply_matches(d): # Apply the title and track. for item in d: if bad_title(item.title): - item.title = unicode(d[item][title_field]) + item.title = six.text_type(d[item][title_field]) if 'track' in d[item] and item.track == 0: item.track = int(d[item]['track']) diff --git a/libs/beetsplug/ftintitle.py b/libs/beetsplug/ftintitle.py index eefdfcf1..9303f9cf 100644 --- a/libs/beetsplug/ftintitle.py +++ b/libs/beetsplug/ftintitle.py @@ -49,29 +49,28 @@ def find_feat_part(artist, albumartist): """Attempt to find featured artists in the item's artist fields and return the results. Returns None if no featured artist found. """ - feat_part = None - # Look for the album artist in the artist field. If it's not # present, give up. albumartist_split = artist.split(albumartist, 1) if len(albumartist_split) <= 1: - return feat_part + return None # If the last element of the split (the right-hand side of the # album artist) is nonempty, then it probably contains the # featured artist. - elif albumartist_split[-1] != '': + elif albumartist_split[1] != '': # Extract the featured artist from the right-hand side. - _, feat_part = split_on_feat(albumartist_split[-1]) + _, feat_part = split_on_feat(albumartist_split[1]) + return feat_part # Otherwise, if there's nothing on the right-hand side, look for a # featuring artist on the left-hand side. else: lhs, rhs = split_on_feat(albumartist_split[0]) if lhs: - feat_part = lhs + return lhs - return feat_part + return None class FtInTitlePlugin(plugins.BeetsPlugin): @@ -90,7 +89,7 @@ class FtInTitlePlugin(plugins.BeetsPlugin): self._command.parser.add_option( u'-d', u'--drop', dest='drop', - action='store_true', default=False, + action='store_true', default=None, help=u'drop featuring from artists and ignore title update') if self.config['auto']: @@ -137,7 +136,7 @@ class FtInTitlePlugin(plugins.BeetsPlugin): # Only update the title if it does not already contain a featured # artist and if we do not drop featuring information. if not drop_feat and not contains_feat(item.title): - feat_format = self.config['format'].get(unicode) + feat_format = self.config['format'].as_str() new_format = feat_format.format(feat_part) new_title = u"{0} {1}".format(item.title, new_format) self._log.info(u'title: {0} -> {1}', item.title, new_title) diff --git a/libs/beetsplug/fuzzy.py b/libs/beetsplug/fuzzy.py index 3decdc60..a7308a52 100644 --- a/libs/beetsplug/fuzzy.py +++ b/libs/beetsplug/fuzzy.py @@ -44,5 +44,5 @@ class FuzzyPlugin(BeetsPlugin): }) def queries(self): - prefix = self.config['prefix'].get(basestring) + prefix = self.config['prefix'].as_str() return {prefix: FuzzyQuery} diff --git a/libs/beetsplug/gmusic.py b/libs/beetsplug/gmusic.py new file mode 100644 index 00000000..259d2725 --- /dev/null +++ b/libs/beetsplug/gmusic.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2017, Tigran Kostandyan. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Upload files to Google Play Music and list songs in its library.""" + +from __future__ import absolute_import, division, print_function +import os.path + +from beets.plugins import BeetsPlugin +from beets import ui +from beets import config +from beets.ui import Subcommand +from gmusicapi import Musicmanager, Mobileclient +from gmusicapi.exceptions import NotLoggedIn +import gmusicapi.clients + + +class Gmusic(BeetsPlugin): + def __init__(self): + super(Gmusic, self).__init__() + # Checks for OAuth2 credentials, + # if they don't exist - performs authorization + self.m = Musicmanager() + if os.path.isfile(gmusicapi.clients.OAUTH_FILEPATH): + self.m.login() + else: + self.m.perform_oauth() + + def commands(self): + gupload = Subcommand('gmusic-upload', + help=u'upload your tracks to Google Play Music') + gupload.func = self.upload + + search = Subcommand('gmusic-songs', + help=u'list of songs in Google Play Music library' + ) + search.parser.add_option('-t', '--track', dest='track', + action='store_true', + help='Search by track name') + search.parser.add_option('-a', '--artist', dest='artist', + action='store_true', + help='Search by artist') + search.func = self.search + return [gupload, search] + + def upload(self, lib, opts, args): + items = lib.items(ui.decargs(args)) + files = [x.path.decode('utf-8') for x in items] + ui.print_(u'Uploading your files...') + self.m.upload(filepaths=files) + ui.print_(u'Your files were successfully added to library') + + def search(self, lib, opts, args): + password = config['gmusic']['password'] + email = config['gmusic']['email'] + password.redact = True + email.redact = True + # Since Musicmanager doesn't support library management + # we need to use mobileclient interface + mobile = Mobileclient() + try: + mobile.login(email.as_str(), password.as_str(), + Mobileclient.FROM_MAC_ADDRESS) + files = mobile.get_all_songs() + except NotLoggedIn: + ui.print_( + u'Authentication error. Please check your email and password.' + ) + return + if not args: + for i, file in enumerate(files, start=1): + print(i, ui.colorize('blue', file['artist']), + file['title'], ui.colorize('red', file['album'])) + else: + if opts.track: + self.match(files, args, 'title') + else: + self.match(files, args, 'artist') + + @staticmethod + def match(files, args, search_by): + for file in files: + if ' '.join(ui.decargs(args)) in file[search_by]: + print(file['artist'], file['title'], file['album']) diff --git a/libs/beetsplug/hook.py b/libs/beetsplug/hook.py index 4f2b8f0e..b6270fd5 100644 --- a/libs/beetsplug/hook.py +++ b/libs/beetsplug/hook.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2015, Adrian Sampson. # @@ -17,15 +18,20 @@ from __future__ import division, absolute_import, print_function import string import subprocess +import six from beets.plugins import BeetsPlugin -from beets.ui import _arg_encoding -from beets.util import shlex_split +from beets.util import shlex_split, arg_encoding class CodingFormatter(string.Formatter): - """A custom string formatter that decodes the format string and it's - fields. + """A variant of `string.Formatter` that converts everything to `unicode` + strings. + + This is necessary on Python 2, where formatting otherwise occurs on + bytestrings. It intercepts two points in the formatting process to decode + the format string and all fields using the specified encoding. If decoding + fails, the values are used as-is. """ def __init__(self, coding): @@ -57,10 +63,9 @@ class CodingFormatter(string.Formatter): """ converted = super(CodingFormatter, self).convert_field(value, conversion) - try: - converted = converted.decode(self._coding) - except UnicodeEncodeError: - pass + + if isinstance(converted, bytes): + return converted.decode(self._coding) return converted @@ -79,8 +84,8 @@ class HookPlugin(BeetsPlugin): for hook_index in range(len(hooks)): hook = self.config['hooks'][hook_index] - hook_event = hook['event'].get(unicode) - hook_command = hook['command'].get(unicode) + hook_event = hook['event'].as_str() + hook_command = hook['command'].as_str() self.create_and_register_hook(hook_event, hook_command) @@ -90,7 +95,12 @@ class HookPlugin(BeetsPlugin): self._log.error('invalid command "{0}"', command) return - formatter = CodingFormatter(_arg_encoding()) + # Use a string formatter that works on Unicode strings. + if six.PY2: + formatter = CodingFormatter(arg_encoding()) + else: + formatter = string.Formatter() + command_pieces = shlex_split(command) for i, piece in enumerate(command_pieces): diff --git a/libs/beetsplug/importadded.py b/libs/beetsplug/importadded.py index 77c7e7ab..36407b14 100644 --- a/libs/beetsplug/importadded.py +++ b/libs/beetsplug/importadded.py @@ -30,12 +30,13 @@ class ImportAddedPlugin(BeetsPlugin): self.item_mtime = dict() register = self.register_listener - register('import_task_start', self.check_config) - register('import_task_start', self.record_if_inplace) + register('import_task_created', self.check_config) + register('import_task_created', self.record_if_inplace) register('import_task_files', self.record_reimported) register('before_item_moved', self.record_import_mtime) register('item_copied', self.record_import_mtime) register('item_linked', self.record_import_mtime) + register('item_hardlinked', self.record_import_mtime) register('album_imported', self.update_album_times) register('item_imported', self.update_item_times) register('after_write', self.update_after_write_time) @@ -51,7 +52,7 @@ class ImportAddedPlugin(BeetsPlugin): def record_if_inplace(self, task, session): if not (session.config['copy'] or session.config['move'] or - session.config['link']): + session.config['link'] or session.config['hardlink']): self._log.debug(u"In place import detected, recording mtimes from " u"source paths") items = [task.item] \ @@ -62,7 +63,7 @@ class ImportAddedPlugin(BeetsPlugin): def record_reimported(self, task, session): self.reimported_item_ids = set(item.id for item, replaced_items - in task.replaced_items.iteritems() + in task.replaced_items.items() if replaced_items) self.replaced_album_paths = set(task.replaced_albums.keys()) diff --git a/libs/beetsplug/importfeeds.py b/libs/beetsplug/importfeeds.py index d046ddc4..35ae2883 100644 --- a/libs/beetsplug/importfeeds.py +++ b/libs/beetsplug/importfeeds.py @@ -24,26 +24,12 @@ import os import re from beets.plugins import BeetsPlugin -from beets.util import mkdirall, normpath, syspath, bytestring_path +from beets.util import mkdirall, normpath, syspath, bytestring_path, link from beets import config M3U_DEFAULT_NAME = 'imported.m3u' -def _get_feeds_dir(lib): - """Given a Library object, return the path to the feeds directory to be - used (either in the library directory or an explicitly configured - path). Ensures that the directory exists. - """ - # Inside library directory. - dirpath = lib.directory - - # Ensure directory exists. - if not os.path.exists(syspath(dirpath)): - os.makedirs(syspath(dirpath)) - return dirpath - - def _build_m3u_filename(basename): """Builds unique m3u filename by appending given basename to current date.""" @@ -61,7 +47,7 @@ def _write_m3u(m3u_path, items_paths): """Append relative paths to items into m3u file. """ mkdirall(m3u_path) - with open(syspath(m3u_path), 'a') as f: + with open(syspath(m3u_path), 'ab') as f: for path in items_paths: f.write(path + b'\n') @@ -78,30 +64,28 @@ class ImportFeedsPlugin(BeetsPlugin): 'absolute_path': False, }) - feeds_dir = self.config['dir'].get() - if feeds_dir: - feeds_dir = os.path.expanduser(bytestring_path(feeds_dir)) - self.config['dir'] = feeds_dir - if not os.path.exists(syspath(feeds_dir)): - os.makedirs(syspath(feeds_dir)) - relative_to = self.config['relative_to'].get() if relative_to: self.config['relative_to'] = normpath(relative_to) else: - self.config['relative_to'] = feeds_dir + self.config['relative_to'] = self.get_feeds_dir() - self.register_listener('library_opened', self.library_opened) self.register_listener('album_imported', self.album_imported) self.register_listener('item_imported', self.item_imported) + def get_feeds_dir(self): + feeds_dir = self.config['dir'].get() + if feeds_dir: + return os.path.expanduser(bytestring_path(feeds_dir)) + return config['directory'].as_filename() + def _record_items(self, lib, basename, items): """Records relative paths to the given items for each feed format """ - feedsdir = bytestring_path(self.config['dir'].as_filename()) + feedsdir = bytestring_path(self.get_feeds_dir()) formats = self.config['formats'].as_str_seq() relative_to = self.config['relative_to'].get() \ - or self.config['dir'].as_filename() + or self.get_feeds_dir() relative_to = bytestring_path(relative_to) paths = [] @@ -119,7 +103,7 @@ class ImportFeedsPlugin(BeetsPlugin): if 'm3u' in formats: m3u_basename = bytestring_path( - self.config['m3u_name'].get(unicode)) + self.config['m3u_name'].as_str()) m3u_path = os.path.join(feedsdir, m3u_basename) _write_m3u(m3u_path, paths) @@ -131,17 +115,13 @@ class ImportFeedsPlugin(BeetsPlugin): for path in paths: dest = os.path.join(feedsdir, os.path.basename(path)) if not os.path.exists(syspath(dest)): - os.symlink(syspath(path), syspath(dest)) + link(path, dest) if 'echo' in formats: self._log.info(u"Location of imported music:") for path in paths: self._log.info(u" {0}", path) - def library_opened(self, lib): - if self.config['dir'].get() is None: - self.config['dir'] = _get_feeds_dir(lib) - def album_imported(self, lib, album): self._record_items(lib, album.album, album.items()) diff --git a/libs/beetsplug/info.py b/libs/beetsplug/info.py index 29bff7a2..0d40c597 100644 --- a/libs/beetsplug/info.py +++ b/libs/beetsplug/info.py @@ -73,7 +73,7 @@ def library_data_emitter(item): def update_summary(summary, tags): - for key, value in tags.iteritems(): + for key, value in tags.items(): if key not in summary: summary[key] = value elif summary[key] != value: @@ -96,7 +96,7 @@ def print_data(data, item=None, fmt=None): path = displayable_path(item.path) if item else None formatted = {} - for key, value in data.iteritems(): + for key, value in data.items(): if isinstance(value, list): formatted[key] = u'; '.join(value) if value is not None: @@ -123,7 +123,7 @@ def print_data_keys(data, item=None): """ path = displayable_path(item.path) if item else None formatted = [] - for key, value in data.iteritems(): + for key, value in data.items(): formatted.append(key) if len(formatted) == 0: @@ -204,7 +204,8 @@ class InfoPlugin(BeetsPlugin): if opts.keys_only: print_data_keys(data, item) else: - print_data(data, item, opts.format) + fmt = ui.decargs([opts.format])[0] if opts.format else None + print_data(data, item, fmt) first = False if opts.summarize: @@ -230,7 +231,7 @@ def make_key_filter(include): def filter_(data): filtered = dict() for key, value in data.items(): - if any(map(lambda m: m.match(key), matchers)): + if any([m.match(key) for m in matchers]): filtered[key] = value return filtered diff --git a/libs/beetsplug/inline.py b/libs/beetsplug/inline.py index 6e3771f2..fd0e9fc3 100644 --- a/libs/beetsplug/inline.py +++ b/libs/beetsplug/inline.py @@ -22,6 +22,7 @@ import itertools from beets.plugins import BeetsPlugin from beets import config +import six FUNC_NAME = u'__INLINE_FUNC__' @@ -32,7 +33,7 @@ class InlineError(Exception): def __init__(self, code, exc): super(InlineError, self).__init__( (u"error in inline path field code:\n" - u"%s\n%s: %s") % (code, type(exc).__name__, unicode(exc)) + u"%s\n%s: %s") % (code, type(exc).__name__, six.text_type(exc)) ) @@ -64,14 +65,14 @@ class InlinePlugin(BeetsPlugin): for key, view in itertools.chain(config['item_fields'].items(), config['pathfields'].items()): self._log.debug(u'adding item field {0}', key) - func = self.compile_inline(view.get(unicode), False) + func = self.compile_inline(view.as_str(), False) if func is not None: self.template_fields[key] = func # Album fields. for key, view in config['album_fields'].items(): self._log.debug(u'adding album field {0}', key) - func = self.compile_inline(view.get(unicode), True) + func = self.compile_inline(view.as_str(), True) if func is not None: self.album_template_fields[key] = func diff --git a/libs/beetsplug/ipfs.py b/libs/beetsplug/ipfs.py index 87a100b1..9a9d6aa5 100644 --- a/libs/beetsplug/ipfs.py +++ b/libs/beetsplug/ipfs.py @@ -272,9 +272,11 @@ class IPFSPlugin(BeetsPlugin): break except AttributeError: pass + item_path = os.path.basename(item.path).decode( + util._fsencoding(), 'ignore' + ) # Clear current path from item - item.path = '/ipfs/{0}/{1}'.format(album.ipfs, - os.path.basename(item.path)) + item.path = '/ipfs/{0}/{1}'.format(album.ipfs, item_path) item.id = None items.append(item) diff --git a/libs/beetsplug/keyfinder.py b/libs/beetsplug/keyfinder.py index b6131a4b..a3fbc821 100644 --- a/libs/beetsplug/keyfinder.py +++ b/libs/beetsplug/keyfinder.py @@ -48,18 +48,18 @@ class KeyFinderPlugin(BeetsPlugin): self.find_key(lib.items(ui.decargs(args)), write=ui.should_write()) def imported(self, session, task): - self.find_key(task.items) + self.find_key(task.imported_items()) def find_key(self, items, write=False): overwrite = self.config['overwrite'].get(bool) - bin = util.bytestring_path(self.config['bin'].get(unicode)) + bin = self.config['bin'].as_str() for item in items: if item['initial_key'] and not overwrite: continue try: - output = util.command_output([bin, b'-f', + output = util.command_output([bin, '-f', util.syspath(item.path)]) except (subprocess.CalledProcessError, OSError) as exc: self._log.error(u'execution failed: {0}', exc) @@ -73,7 +73,7 @@ class KeyFinderPlugin(BeetsPlugin): key_raw = output.rsplit(None, 1)[-1] try: - key = key_raw.decode('utf8') + key = util.text_string(key_raw) except UnicodeDecodeError: self._log.error(u'output is invalid UTF-8') continue diff --git a/libs/beetsplug/kodiupdate.py b/libs/beetsplug/kodiupdate.py new file mode 100644 index 00000000..ce5cb478 --- /dev/null +++ b/libs/beetsplug/kodiupdate.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2017, Pauli Kettunen. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Updates a Kodi library whenever the beets library is changed. +This is based on the Plex Update plugin. + +Put something like the following in your config.yaml to configure: + kodi: + host: localhost + port: 8080 + user: user + pwd: secret +""" +from __future__ import division, absolute_import, print_function + +import requests +from beets import config +from beets.plugins import BeetsPlugin +import six + + +def update_kodi(host, port, user, password): + """Sends request to the Kodi api to start a library refresh. + """ + url = "http://{0}:{1}/jsonrpc".format(host, port) + + """Content-Type: application/json is mandatory + according to the kodi jsonrpc documentation""" + + headers = {'Content-Type': 'application/json'} + + # Create the payload. Id seems to be mandatory. + payload = {'jsonrpc': '2.0', 'method': 'AudioLibrary.Scan', 'id': 1} + r = requests.post( + url, + auth=(user, password), + json=payload, + headers=headers) + + return r + + +class KodiUpdate(BeetsPlugin): + def __init__(self): + super(KodiUpdate, self).__init__() + + # Adding defaults. + config['kodi'].add({ + u'host': u'localhost', + u'port': 8080, + u'user': u'kodi', + u'pwd': u'kodi'}) + + config['kodi']['pwd'].redact = True + self.register_listener('database_change', self.listen_for_db_change) + + def listen_for_db_change(self, lib, model): + """Listens for beets db change and register the update""" + self.register_listener('cli_exit', self.update) + + def update(self, lib): + """When the client exists try to send refresh request to Kodi server. + """ + self._log.info(u'Requesting a Kodi library update...') + + # Try to send update request. + try: + r = update_kodi( + config['kodi']['host'].get(), + config['kodi']['port'].get(), + config['kodi']['user'].get(), + config['kodi']['pwd'].get()) + r.raise_for_status() + + except requests.exceptions.RequestException as e: + self._log.warning(u'Kodi update failed: {0}', + six.text_type(e)) + return + + json = r.json() + if json.get('result') != 'OK': + self._log.warning(u'Kodi update failed: JSON response was {0!r}', + json) + return + + self._log.info(u'Kodi update triggered') diff --git a/libs/beetsplug/lastgenre/__init__.py b/libs/beetsplug/lastgenre/__init__.py index a4b8f062..4374310b 100644 --- a/libs/beetsplug/lastgenre/__init__.py +++ b/libs/beetsplug/lastgenre/__init__.py @@ -14,6 +14,7 @@ # included in all copies or substantial portions of the Software. from __future__ import division, absolute_import, print_function +import six """Gets genres for imported music based on Last.fm tags. @@ -24,6 +25,7 @@ The scraper script used is available here: https://gist.github.com/1241307 """ import pylast +import codecs import os import yaml import traceback @@ -71,7 +73,7 @@ def flatten_tree(elem, path, branches): for sub in elem: flatten_tree(sub, path, branches) else: - branches.append(path + [unicode(elem)]) + branches.append(path + [six.text_type(elem)]) def find_parents(candidate, branches): @@ -107,6 +109,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): 'force': True, 'auto': True, 'separator': u', ', + 'prefer_specific': False, }) self.setup() @@ -126,9 +129,9 @@ class LastGenrePlugin(plugins.BeetsPlugin): wl_filename = WHITELIST if wl_filename: wl_filename = normpath(wl_filename) - with open(wl_filename, 'r') as f: + with open(wl_filename, 'rb') as f: for line in f: - line = line.decode('utf8').strip().lower() + line = line.decode('utf-8').strip().lower() if line and not line.startswith(u'#'): self.whitelist.add(line) @@ -139,7 +142,8 @@ class LastGenrePlugin(plugins.BeetsPlugin): c14n_filename = C14N_TREE if c14n_filename: c14n_filename = normpath(c14n_filename) - genres_tree = yaml.load(open(c14n_filename, 'r')) + with codecs.open(c14n_filename, 'r', encoding='utf-8') as f: + genres_tree = yaml.load(f) flatten_tree(genres_tree, [], self.c14n_branches) @property @@ -155,6 +159,25 @@ class LastGenrePlugin(plugins.BeetsPlugin): elif source == 'artist': return 'artist', + def _get_depth(self, tag): + """Find the depth of a tag in the genres tree. + """ + depth = None + for key, value in enumerate(self.c14n_branches): + if tag in value: + depth = value.index(tag) + break + return depth + + def _sort_by_depth(self, tags): + """Given a list of tags, sort the tags by their depths in the + genre tree. + """ + depth_tag_pairs = [(self._get_depth(t), t) for t in tags] + depth_tag_pairs = [e for e in depth_tag_pairs if e[0] is not None] + depth_tag_pairs.sort(reverse=True) + return [p[1] for p in depth_tag_pairs] + def _resolve_genres(self, tags): """Given a list of strings, return a genre by joining them into a single string and (optionally) canonicalizing each. @@ -176,17 +199,24 @@ class LastGenrePlugin(plugins.BeetsPlugin): parents = [find_parents(tag, self.c14n_branches)[-1]] tags_all += parents - if len(tags_all) >= count: + # Stop if we have enough tags already, unless we need to find + # the most specific tag (instead of the most popular). + if (not self.config['prefer_specific'] and + len(tags_all) >= count): break tags = tags_all tags = deduplicate(tags) + # Sort the tags by specificity. + if self.config['prefer_specific']: + tags = self._sort_by_depth(tags) + # c14n only adds allowed genres but we may have had forbidden genres in # the original tags list tags = [x.title() for x in tags if self._is_allowed(x)] - return self.config['separator'].get(unicode).join( + return self.config['separator'].as_str().join( tags[:self.config['count'].get(int)] ) @@ -221,7 +251,8 @@ class LastGenrePlugin(plugins.BeetsPlugin): if any(not s for s in args): return None - key = u'{0}.{1}'.format(entity, u'-'.join(unicode(a) for a in args)) + key = u'{0}.{1}'.format(entity, + u'-'.join(six.text_type(a) for a in args)) if key in self._genre_cache: return self._genre_cache[key] else: @@ -297,7 +328,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): result = None if isinstance(obj, library.Item): result = self.fetch_artist_genre(obj) - elif obj.albumartist != config['va_name'].get(unicode): + elif obj.albumartist != config['va_name'].as_str(): result = self.fetch_album_artist_genre(obj) else: # For "Various Artists", pick the most popular track genre. @@ -400,7 +431,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): """ # Work around an inconsistency in pylast where # Album.get_top_tags() does not return TopItem instances. - # https://code.google.com/p/pylast/issues/detail?id=85 + # https://github.com/pylast/pylast/issues/86 if isinstance(obj, pylast.Album): obj = super(pylast.Album, obj) diff --git a/libs/beetsplug/lastimport.py b/libs/beetsplug/lastimport.py index 2d8cc700..d7b84b0a 100644 --- a/libs/beetsplug/lastimport.py +++ b/libs/beetsplug/lastimport.py @@ -23,7 +23,7 @@ from beets import config from beets import plugins from beets.dbcore import types -API_URL = 'http://ws.audioscrobbler.com/2.0/' +API_URL = 'https://ws.audioscrobbler.com/2.0/' class LastImportPlugin(plugins.BeetsPlugin): @@ -110,7 +110,7 @@ class CustomUser(pylast.User): def import_lastfm(lib, log): - user = config['lastfm']['user'].get(unicode) + user = config['lastfm']['user'].as_str() per_page = config['lastimport']['per_page'].get(int) if not user: @@ -192,7 +192,7 @@ def process_tracks(lib, tracks, log): total_fails = 0 log.info(u'Received {0} tracks in this page, processing...', total) - for num in xrange(0, total): + for num in range(0, total): song = None trackid = tracks[num]['mbid'].strip() artist = tracks[num]['artist'].get('name', '').strip() diff --git a/libs/beetsplug/lyrics.py b/libs/beetsplug/lyrics.py index b6936e1b..60f53759 100644 --- a/libs/beetsplug/lyrics.py +++ b/libs/beetsplug/lyrics.py @@ -19,14 +19,18 @@ from __future__ import absolute_import, division, print_function import difflib +import errno import itertools import json +import struct +import os.path import re import requests import unicodedata -import urllib +from unidecode import unidecode import warnings -from HTMLParser import HTMLParseError +import six +from six.moves import urllib try: from bs4 import SoupStrainer, BeautifulSoup @@ -40,9 +44,18 @@ try: except ImportError: HAS_LANGDETECT = False +try: + # PY3: HTMLParseError was removed in 3.5 as strict mode + # was deprecated in 3.3. + # https://docs.python.org/3.3/library/html.parser.html + from six.moves.html_parser import HTMLParseError +except ImportError: + class HTMLParseError(Exception): + pass + from beets import plugins from beets import ui - +import beets DIV_RE = re.compile(r'<(/?)div>?', re.I) COMMENT_RE = re.compile(r'<!--.*-->', re.S) @@ -62,20 +75,62 @@ URL_CHARACTERS = { u'\u2016': u'-', u'\u2026': u'...', } +USER_AGENT = 'beets/{}'.format(beets.__version__) + +# The content for the base index.rst generated in ReST mode. +REST_INDEX_TEMPLATE = u'''Lyrics +====== + +* :ref:`Song index <genindex>` +* :ref:`search` + +Artist index: + +.. toctree:: + :maxdepth: 1 + :glob: + + artists/* +''' + +# The content for the base conf.py generated. +REST_CONF_TEMPLATE = u'''# -*- coding: utf-8 -*- +master_doc = 'index' +project = u'Lyrics' +copyright = u'none' +author = u'Various Authors' +latex_documents = [ + (master_doc, 'Lyrics.tex', project, + author, 'manual'), +] +epub_title = project +epub_author = author +epub_publisher = author +epub_copyright = copyright +epub_exclude_files = ['search.html'] +epub_tocdepth = 1 +epub_tocdup = False +''' # Utilities. +def unichar(i): + try: + return six.unichr(i) + except ValueError: + return struct.pack('i', i).decode('utf-32') + def unescape(text): """Resolve &#xxx; HTML entities (and some others).""" if isinstance(text, bytes): - text = text.decode('utf8', 'ignore') + text = text.decode('utf-8', 'ignore') out = text.replace(u' ', u' ') def replchar(m): num = m.group(1) - return unichr(int(num)) + return unichar(int(num)) out = re.sub(u"&#(\d+);", replchar, out) return out @@ -93,7 +148,6 @@ def extract_text_in(html, starttag): """Extract the text from a <DIV> tag in the HTML starting with ``starttag``. Returns None if parsing fails. """ - # Strip off the leading text before opening tag. try: _, html = html.split(starttag, 1) @@ -134,30 +188,33 @@ def search_pairs(item): and featured artists from the strings and add them as candidates. The method also tries to split multiple titles separated with `/`. """ + def generate_alternatives(string, patterns): + """Generate string alternatives by extracting first matching group for + each given pattern. + """ + alternatives = [string] + for pattern in patterns: + match = re.search(pattern, string, re.IGNORECASE) + if match: + alternatives.append(match.group(1)) + return alternatives title, artist = item.title, item.artist - titles = [title] - artists = [artist] - # Remove any featuring artists from the artists name - pattern = r"(.*?) {0}".format(plugins.feat_tokens()) - match = re.search(pattern, artist, re.IGNORECASE) - if match: - artists.append(match.group(1)) + patterns = [ + # Remove any featuring artists from the artists name + r"(.*?) {0}".format(plugins.feat_tokens())] + artists = generate_alternatives(artist, patterns) - # Remove a parenthesized suffix from a title string. Common - # examples include (live), (remix), and (acoustic). - pattern = r"(.+?)\s+[(].*[)]$" - match = re.search(pattern, title, re.IGNORECASE) - if match: - titles.append(match.group(1)) - - # Remove any featuring artists from the title - pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)) - for title in titles[:]: - match = re.search(pattern, title, re.IGNORECASE) - if match: - titles.append(match.group(1)) + patterns = [ + # Remove a parenthesized suffix from a title string. Common + # examples include (live), (remix), and (acoustic). + r"(.+?)\s+[(].*[)]$", + # Remove any featuring artists from the title + r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)), + # Remove part of title after colon ':' for songs with subtitles + r"(.+?)\s*:.*"] + titles = generate_alternatives(title, patterns) # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) # and each of them. @@ -170,6 +227,24 @@ def search_pairs(item): return itertools.product(artists, multi_titles) +def slug(text): + """Make a URL-safe, human-readable version of the given text + + This will do the following: + + 1. decode unicode characters into ASCII + 2. shift everything to lowercase + 3. strip whitespace + 4. replace other non-word characters with dashes + 5. strip extra dashes + + This somewhat duplicates the :func:`Google.slugify` function but + slugify is not as generic as this one, which can be reused + elsewhere. + """ + return re.sub(r'\W+', '-', unidecode(text).lower().strip()).strip('-') + + class Backend(object): def __init__(self, config, log): self._log = log @@ -177,11 +252,11 @@ class Backend(object): @staticmethod def _encode(s): """Encode the string for inclusion in a URL""" - if isinstance(s, unicode): + if isinstance(s, six.text_type): for char, repl in URL_CHARACTERS.items(): s = s.replace(char, repl) - s = s.encode('utf8', 'ignore') - return urllib.quote(s) + s = s.encode('utf-8', 'ignore') + return urllib.parse.quote(s) def build_url(self, artist, title): return self.URL_PATTERN % (self._encode(artist.title()), @@ -198,7 +273,9 @@ class Backend(object): # We're not overly worried about the NSA MITMing our lyrics scraper with warnings.catch_warnings(): warnings.simplefilter('ignore') - r = requests.get(url, verify=False) + r = requests.get(url, verify=False, headers={ + 'User-Agent': USER_AGENT, + }) except requests.RequestException as exc: self._log.debug(u'lyrics request failed: {0}', exc) return @@ -218,12 +295,12 @@ class SymbolsReplaced(Backend): '>': 'Greater_Than', '#': 'Number_', r'[\[\{]': '(', - r'[\[\{]': ')' + r'[\]\}]': ')', } @classmethod def _encode(cls, s): - for old, new in cls.REPLACEMENTS.iteritems(): + for old, new in cls.REPLACEMENTS.items(): s = re.sub(old, new, s) return super(SymbolsReplaced, cls)._encode(s) @@ -238,104 +315,97 @@ class MusiXmatch(SymbolsReplaced): def fetch(self, artist, title): url = self.build_url(artist, title) + html = self.fetch_url(url) if not html: return - lyrics = extract_text_between(html, - '"body":', '"language":') - return lyrics.strip(',"').replace('\\n', '\n') + if "We detected that your IP is blocked" in html: + self._log.warning(u'we are blocked at MusixMatch: url %s failed' + % url) + return + html_part = html.split('<p class="mxm-lyrics__content')[-1] + lyrics = extract_text_between(html_part, '>', '</p>') + lyrics = lyrics.strip(',"').replace('\\n', '\n') + # another odd case: sometimes only that string remains, for + # missing songs. this seems to happen after being blocked + # above, when filling in the CAPTCHA. + if "Instant lyrics for all your music." in lyrics: + return + return lyrics class Genius(Backend): - """Fetch lyrics from Genius via genius-api.""" + """Fetch lyrics from Genius via genius-api. + + Simply adapted from + bigishdata.com/2016/09/27/getting-song-lyrics-from-geniuss-api-scraping/ + """ + + base_url = "https://api.genius.com" + def __init__(self, config, log): super(Genius, self).__init__(config, log) - self.api_key = config['genius_api_key'].get(unicode) - self.headers = {'Authorization': "Bearer %s" % self.api_key} + self.api_key = config['genius_api_key'].as_str() + self.headers = { + 'Authorization': "Bearer %s" % self.api_key, + 'User-Agent': USER_AGENT, + } - def search_genius(self, artist, title): - query = u"%s %s" % (artist, title) - url = u'https://api.genius.com/search?q=%s' \ - % (urllib.quote(query.encode('utf8'))) + def lyrics_from_song_api_path(self, song_api_path): + song_url = self.base_url + song_api_path + response = requests.get(song_url, headers=self.headers) + json = response.json() + path = json["response"]["song"]["path"] - self._log.debug(u'genius: requesting search {}', url) + # Gotta go regular html scraping... come on Genius. + page_url = "https://genius.com" + path try: - req = requests.get( - url, - headers=self.headers, - allow_redirects=True - ) - req.raise_for_status() + page = requests.get(page_url) except requests.RequestException as exc: - self._log.debug(u'genius: request error: {}', exc) + self._log.debug(u'Genius page request for {0} failed: {1}', + page_url, exc) return None + html = BeautifulSoup(page.text, "html.parser") - try: - return req.json() - except ValueError: - self._log.debug(u'genius: invalid response: {}', req.text) - return None + # Remove script tags that they put in the middle of the lyrics. + [h.extract() for h in html('script')] - def get_lyrics(self, link): - url = u'http://genius-api.com/api/lyricsInfo' + # At least Genius is nice and has a tag called 'lyrics'! + # Updated css where the lyrics are based in HTML. + lyrics = html.find("div", class_="lyrics").get_text() - self._log.debug(u'genius: requesting lyrics for link {}', link) - try: - req = requests.post( - url, - data={'link': link}, - headers=self.headers, - allow_redirects=True - ) - req.raise_for_status() - except requests.RequestException as exc: - self._log.debug(u'genius: request error: {}', exc) - return None - - try: - return req.json() - except ValueError: - self._log.debug(u'genius: invalid response: {}', req.text) - return None - - def build_lyric_string(self, lyrics): - if 'lyrics' not in lyrics: - return - sections = lyrics['lyrics']['sections'] - - lyrics_list = [] - for section in sections: - lyrics_list.append(section['name']) - lyrics_list.append('\n') - for verse in section['verses']: - if 'content' in verse: - lyrics_list.append(verse['content']) - - return ''.join(lyrics_list) + return lyrics def fetch(self, artist, title): - search_data = self.search_genius(artist, title) - if not search_data: - return + search_url = self.base_url + "/search" + data = {'q': title} + try: + response = requests.get(search_url, data=data, + headers=self.headers) + except requests.RequestException as exc: + self._log.debug(u'Genius API request failed: {0}', exc) + return None - if not search_data['meta']['status'] == 200: - return - else: - records = search_data['response']['hits'] - if not records: - return + try: + json = response.json() + except ValueError: + self._log.debug(u'Genius API request returned invalid JSON') + return None - record_url = records[0]['result']['url'] - lyric_data = self.get_lyrics(record_url) - if not lyric_data: - return - lyrics = self.build_lyric_string(lyric_data) + song_info = None + for hit in json["response"]["hits"]: + if hit["result"]["primary_artist"]["name"] == artist: + song_info = hit + break - return lyrics + if song_info: + song_api_path = song_info["result"]["api_path"] + return self.lyrics_from_song_api_path(song_api_path) class LyricsWiki(SymbolsReplaced): """Fetch lyrics from LyricsWiki.""" + URL_PATTERN = 'http://lyrics.wikia.com/%s:%s' def fetch(self, artist, title): @@ -354,38 +424,6 @@ class LyricsWiki(SymbolsReplaced): return lyrics -class LyricsCom(Backend): - """Fetch lyrics from Lyrics.com.""" - URL_PATTERN = 'http://www.lyrics.com/%s-lyrics-%s.html' - NOT_FOUND = ( - 'Sorry, we do not have the lyric', - 'Submit Lyrics', - ) - - @classmethod - def _encode(cls, s): - s = re.sub(r'[^\w\s-]', '', s) - s = re.sub(r'\s+', '-', s) - return super(LyricsCom, cls)._encode(s).lower() - - def fetch(self, artist, title): - url = self.build_url(artist, title) - html = self.fetch_url(url) - if not html: - return - lyrics = extract_text_between(html, '<div id="lyrics" class="SCREENO' - 'NLY" itemprop="description">', '</div>') - if not lyrics: - return - for not_found_str in self.NOT_FOUND: - if not_found_str in lyrics: - return - - parts = lyrics.split('\n---\nLyrics powered by', 1) - if parts: - return parts[0] - - def remove_credits(text): """Remove first/last line of text if it contains the word 'lyrics' eg 'Lyrics by songsdatabase.com' @@ -459,10 +497,11 @@ def scrape_lyrics_from_html(html): class Google(Backend): """Fetch lyrics from Google search results.""" + def __init__(self, config, log): super(Google, self).__init__(config, log) - self.api_key = config['google_API_key'].get(unicode) - self.engine_id = config['google_engine_ID'].get(unicode) + self.api_key = config['google_API_key'].as_str() + self.engine_id = config['google_engine_ID'].as_str() def is_lyrics(self, text, artist=None): """Determine whether the text seems to be valid lyrics. @@ -503,7 +542,7 @@ class Google(Backend): try: text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore') - text = unicode(re.sub('[-\s]+', ' ', text)) + text = six.text_type(re.sub('[-\s]+', ' ', text.decode('utf-8'))) except UnicodeDecodeError: self._log.exception(u"Failing to normalize '{0}'", text) return text @@ -542,14 +581,20 @@ class Google(Backend): query = u"%s %s" % (artist, title) url = u'https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=%s' \ % (self.api_key, self.engine_id, - urllib.quote(query.encode('utf8'))) + urllib.parse.quote(query.encode('utf-8'))) - data = urllib.urlopen(url) - data = json.load(data) + data = self.fetch_url(url) + if not data: + self._log.debug(u'google backend returned no data') + return None + try: + data = json.loads(data) + except ValueError as exc: + self._log.debug(u'google backend returned malformed JSON: {}', exc) if 'error' in data: reason = data['error']['errors'][0]['reason'] - self._log.debug(u'google lyrics backend error: {0}', reason) - return + self._log.debug(u'google backend error: {0}', reason) + return None if 'items' in data.keys(): for item in data['items']: @@ -570,11 +615,10 @@ class Google(Backend): class LyricsPlugin(plugins.BeetsPlugin): - SOURCES = ['google', 'lyricwiki', 'lyrics.com', 'musixmatch'] + SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius'] SOURCE_BACKENDS = { 'google': Google, 'lyricwiki': LyricsWiki, - 'lyrics.com': LyricsCom, 'musixmatch': MusiXmatch, 'genius': Genius, } @@ -594,6 +638,7 @@ class LyricsPlugin(plugins.BeetsPlugin): "76V-uFL5jks5dNvcGCdarqFjDhP9c", 'fallback': None, 'force': False, + 'local': False, 'sources': self.SOURCES, }) self.config['bing_client_secret'].redact = True @@ -601,30 +646,49 @@ class LyricsPlugin(plugins.BeetsPlugin): self.config['google_engine_ID'].redact = True self.config['genius_api_key'].redact = True + # State information for the ReST writer. + # First, the current artist we're writing. + self.artist = u'Unknown artist' + # The current album: False means no album yet. + self.album = False + # The current rest file content. None means the file is not + # open yet. + self.rest = None + available_sources = list(self.SOURCES) sources = plugins.sanitize_choices( self.config['sources'].as_str_seq(), available_sources) if 'google' in sources: if not self.config['google_API_key'].get(): - self._log.warn(u'To use the google lyrics source, you must ' - u'provide an API key in the configuration. ' - u'See the documentation for further details.') + # We log a *debug* message here because the default + # configuration includes `google`. This way, the source + # is silent by default but can be enabled just by + # setting an API key. + self._log.debug(u'Disabling google source: ' + u'no API key configured.') sources.remove('google') - if not HAS_BEAUTIFUL_SOUP: - self._log.warn(u'To use the google lyrics source, you must ' - u'install the beautifulsoup4 module. See the ' - u'documentation for further details.') + elif not HAS_BEAUTIFUL_SOUP: + self._log.warning(u'To use the google lyrics source, you must ' + u'install the beautifulsoup4 module. See ' + u'the documentation for further details.') sources.remove('google') + if 'genius' in sources and not HAS_BEAUTIFUL_SOUP: + self._log.debug( + u'The Genius backend requires BeautifulSoup, which is not ' + u'installed, so the source is disabled.' + ) + sources.remove('genius') + self.config['bing_lang_from'] = [ x.lower() for x in self.config['bing_lang_from'].as_str_seq()] self.bing_auth_token = None if not HAS_LANGDETECT and self.config['bing_client_secret'].get(): - self._log.warn(u'To use bing translations, you need to ' - u'install the langdetect module. See the ' - u'documentation for further details.') + self._log.warning(u'To use bing translations, you need to ' + u'install the langdetect module. See the ' + u'documentation for further details.') self.backends = [self.SOURCE_BACKENDS[source](self.config, self._log) for source in sources] @@ -633,14 +697,14 @@ class LyricsPlugin(plugins.BeetsPlugin): params = { 'client_id': 'beets', 'client_secret': self.config['bing_client_secret'], - 'scope': 'http://api.microsofttranslator.com', + 'scope': "https://api.microsofttranslator.com", 'grant_type': 'client_credentials', } oauth_url = 'https://datamarket.accesscontrol.windows.net/v2/OAuth2-13' oauth_token = json.loads(requests.post( oauth_url, - data=urllib.urlencode(params)).content) + data=urllib.parse.urlencode(params)).content) if 'access_token' in oauth_token: return "Bearer " + oauth_token['access_token'] else: @@ -654,27 +718,106 @@ class LyricsPlugin(plugins.BeetsPlugin): action='store_true', default=False, help=u'print lyrics to console', ) + cmd.parser.add_option( + u'-r', u'--write-rest', dest='writerest', + action='store', default=None, metavar='dir', + help=u'write lyrics to given directory as ReST files', + ) cmd.parser.add_option( u'-f', u'--force', dest='force_refetch', action='store_true', default=False, help=u'always re-download lyrics', ) + cmd.parser.add_option( + u'-l', u'--local', dest='local_only', + action='store_true', default=False, + help=u'do not fetch missing lyrics', + ) def func(lib, opts, args): # The "write to files" option corresponds to the # import_write config value. write = ui.should_write() + if opts.writerest: + self.writerest_indexes(opts.writerest) for item in lib.items(ui.decargs(args)): - self.fetch_item_lyrics( - lib, item, write, - opts.force_refetch or self.config['force'], - ) - if opts.printlyr and item.lyrics: - ui.print_(item.lyrics) - + if not opts.local_only and not self.config['local']: + self.fetch_item_lyrics( + lib, item, write, + opts.force_refetch or self.config['force'], + ) + if item.lyrics: + if opts.printlyr: + ui.print_(item.lyrics) + if opts.writerest: + self.writerest(opts.writerest, item) + if opts.writerest: + # flush last artist + self.writerest(opts.writerest, None) + ui.print_(u'ReST files generated. to build, use one of:') + ui.print_(u' sphinx-build -b html %s _build/html' + % opts.writerest) + ui.print_(u' sphinx-build -b epub %s _build/epub' + % opts.writerest) + ui.print_((u' sphinx-build -b latex %s _build/latex ' + u'&& make -C _build/latex all-pdf') + % opts.writerest) cmd.func = func return [cmd] + def writerest(self, directory, item): + """Write the item to an ReST file + + This will keep state (in the `rest` variable) in order to avoid + writing continuously to the same files. + """ + + if item is None or slug(self.artist) != slug(item.albumartist): + if self.rest is not None: + path = os.path.join(directory, 'artists', + slug(self.artist) + u'.rst') + with open(path, 'wb') as output: + output.write(self.rest.encode('utf-8')) + self.rest = None + if item is None: + return + self.artist = item.albumartist.strip() + self.rest = u"%s\n%s\n\n.. contents::\n :local:\n\n" \ + % (self.artist, + u'=' * len(self.artist)) + if self.album != item.album: + tmpalbum = self.album = item.album.strip() + if self.album == '': + tmpalbum = u'Unknown album' + self.rest += u"%s\n%s\n\n" % (tmpalbum, u'-' * len(tmpalbum)) + title_str = u":index:`%s`" % item.title.strip() + block = u'| ' + item.lyrics.replace(u'\n', u'\n| ') + self.rest += u"%s\n%s\n\n%s\n\n" % (title_str, + u'~' * len(title_str), + block) + + def writerest_indexes(self, directory): + """Write conf.py and index.rst files necessary for Sphinx + + We write minimal configurations that are necessary for Sphinx + to operate. We do not overwrite existing files so that + customizations are respected.""" + try: + os.makedirs(os.path.join(directory, 'artists')) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + indexfile = os.path.join(directory, 'index.rst') + if not os.path.exists(indexfile): + with open(indexfile, 'w') as output: + output.write(REST_INDEX_TEMPLATE) + conffile = os.path.join(directory, 'conf.py') + if not os.path.exists(conffile): + with open(conffile, 'w') as output: + output.write(REST_CONF_TEMPLATE) + def imported(self, session, task): """Import hook for fetching lyrics automatically. """ @@ -685,7 +828,8 @@ class LyricsPlugin(plugins.BeetsPlugin): def fetch_item_lyrics(self, lib, item, write, force): """Fetch and store lyrics for a single item. If ``write``, then the - lyrics will also be written to the file itself.""" + lyrics will also be written to the file itself. + """ # Skip if the item already has lyrics. if not force and item.lyrics: self._log.info(u'lyrics already present: {0}', item) @@ -740,7 +884,7 @@ class LyricsPlugin(plugins.BeetsPlugin): if self.bing_auth_token: # Extract unique lines to limit API request size per song text_lines = set(text.split('\n')) - url = ('http://api.microsofttranslator.com/v2/Http.svc/' + url = ('https://api.microsofttranslator.com/v2/Http.svc/' 'Translate?text=%s&to=%s' % ('|'.join(text_lines), to_lang)) r = requests.get(url, headers={"Authorization ": self.bing_auth_token}) @@ -751,7 +895,7 @@ class LyricsPlugin(plugins.BeetsPlugin): self.bing_auth_token = None return self.append_translation(text, to_lang) return text - lines_translated = ET.fromstring(r.text.encode('utf8')).text + lines_translated = ET.fromstring(r.text.encode('utf-8')).text # Use a translation mapping dict to build resulting lyrics translations = dict(zip(text_lines, lines_translated.split('|'))) result = '' diff --git a/libs/beetsplug/mbcollection.py b/libs/beetsplug/mbcollection.py index b95ba6fe..d99c386c 100644 --- a/libs/beetsplug/mbcollection.py +++ b/libs/beetsplug/mbcollection.py @@ -1,17 +1,17 @@ # -*- coding: utf-8 -*- -# Copyright (c) 2011, Jeffrey Aylesworth <jeffrey@aylesworth.ca> +# This file is part of beets. +# Copyright (c) 2011, Jeffrey Aylesworth <mail@jeffrey.red> # -# Permission to use, copy, modify, and/or distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notice and this permission notice appear in all copies. +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: # -# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. from __future__ import division, absolute_import, print_function @@ -24,6 +24,7 @@ import musicbrainzngs import re SUBMISSION_CHUNK_SIZE = 200 +FETCH_CHUNK_SIZE = 100 UUID_REGEX = r'^[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}$' @@ -57,44 +58,93 @@ class MusicBrainzCollectionPlugin(BeetsPlugin): super(MusicBrainzCollectionPlugin, self).__init__() config['musicbrainz']['pass'].redact = True musicbrainzngs.auth( - config['musicbrainz']['user'].get(unicode), - config['musicbrainz']['pass'].get(unicode), + config['musicbrainz']['user'].as_str(), + config['musicbrainz']['pass'].as_str(), ) - self.config.add({'auto': False}) + self.config.add({ + 'auto': False, + 'collection': u'', + 'remove': False, + }) if self.config['auto']: self.import_stages = [self.imported] + def _get_collection(self): + collections = mb_call(musicbrainzngs.get_collections) + if not collections['collection-list']: + raise ui.UserError(u'no collections exist for user') + + # Get all collection IDs, avoiding event collections + collection_ids = [x['id'] for x in collections['collection-list']] + if not collection_ids: + raise ui.UserError(u'No collection found.') + + # Check that the collection exists so we can present a nice error + collection = self.config['collection'].as_str() + if collection: + if collection not in collection_ids: + raise ui.UserError(u'invalid collection ID: {}' + .format(collection)) + return collection + + # No specified collection. Just return the first collection ID + return collection_ids[0] + + def _get_albums_in_collection(self, id): + def _fetch(offset): + res = mb_call( + musicbrainzngs.get_releases_in_collection, + id, + limit=FETCH_CHUNK_SIZE, + offset=offset + )['collection'] + return [x['id'] for x in res['release-list']], res['release-count'] + + offset = 0 + albums_in_collection, release_count = _fetch(offset) + for i in range(0, release_count, FETCH_CHUNK_SIZE): + albums_in_collection += _fetch(offset)[0] + offset += FETCH_CHUNK_SIZE + + return albums_in_collection + def commands(self): mbupdate = Subcommand('mbupdate', help=u'Update MusicBrainz collection') + mbupdate.parser.add_option('-r', '--remove', + action='store_true', + default=None, + dest='remove', + help='Remove albums not in beets library') mbupdate.func = self.update_collection return [mbupdate] + def remove_missing(self, collection_id, lib_albums): + lib_ids = set([x.mb_albumid for x in lib_albums]) + albums_in_collection = self._get_albums_in_collection(collection_id) + remove_me = list(set(albums_in_collection) - lib_ids) + for i in range(0, len(remove_me), FETCH_CHUNK_SIZE): + chunk = remove_me[i:i + FETCH_CHUNK_SIZE] + mb_call( + musicbrainzngs.remove_releases_from_collection, + collection_id, chunk + ) + def update_collection(self, lib, opts, args): - self.update_album_list(lib.albums()) + self.config.set_args(opts) + remove_missing = self.config['remove'].get(bool) + self.update_album_list(lib, lib.albums(), remove_missing) def imported(self, session, task): """Add each imported album to the collection. """ if task.is_album: - self.update_album_list([task.album]) + self.update_album_list(session.lib, [task.album]) - def update_album_list(self, album_list): - """Update the MusicBrainz colleciton from a list of Beets albums + def update_album_list(self, lib, album_list, remove_missing=False): + """Update the MusicBrainz collection from a list of Beets albums """ - # Get the available collections. - collections = mb_call(musicbrainzngs.get_collections) - if not collections['collection-list']: - raise ui.UserError(u'no collections exist for user') - - # Get the first release collection. MusicBrainz also has event - # collections, so we need to avoid adding to those. - for collection in collections['collection-list']: - if 'release-count' in collection: - collection_id = collection['id'] - break - else: - raise ui.UserError(u'No collection found.') + collection_id = self._get_collection() # Get a list of all the album IDs. album_ids = [] @@ -111,4 +161,6 @@ class MusicBrainzCollectionPlugin(BeetsPlugin): u'Updating MusicBrainz collection {0}...', collection_id ) submit_albums(collection_id, album_ids) + if remove_missing: + self.remove_missing(collection_id, lib.albums()) self._log.info(u'...MusicBrainz collection updated.') diff --git a/libs/beetsplug/mbsubmit.py b/libs/beetsplug/mbsubmit.py index 91de6128..02bd5f69 100644 --- a/libs/beetsplug/mbsubmit.py +++ b/libs/beetsplug/mbsubmit.py @@ -36,7 +36,7 @@ class MBSubmitPlugin(BeetsPlugin): super(MBSubmitPlugin, self).__init__() self.config.add({ - 'format': '$track. $title - $artist ($length)', + 'format': u'$track. $title - $artist ($length)', 'threshold': 'medium', }) @@ -56,5 +56,5 @@ class MBSubmitPlugin(BeetsPlugin): return [PromptChoice(u'p', u'Print tracks', self.print_tracks)] def print_tracks(self, session, task): - for i in task.items: - print_data(None, i, self.config['format'].get()) + for i in sorted(task.items, key=lambda i: i.track): + print_data(None, i, self.config['format'].as_str()) diff --git a/libs/beetsplug/mbsync.py b/libs/beetsplug/mbsync.py index cf58c82d..1764a177 100644 --- a/libs/beetsplug/mbsync.py +++ b/libs/beetsplug/mbsync.py @@ -117,38 +117,48 @@ class MBSyncPlugin(BeetsPlugin): album_formatted) continue - # Map recording MBIDs to their information. Recordings can appear - # multiple times on a release, so each MBID maps to a list of - # TrackInfo objects. + # Map release track and recording MBIDs to their information. + # Recordings can appear multiple times on a release, so each MBID + # maps to a list of TrackInfo objects. + releasetrack_index = dict() track_index = defaultdict(list) for track_info in album_info.tracks: + releasetrack_index[track_info.release_track_id] = track_info track_index[track_info.track_id].append(track_info) - # Construct a track mapping according to MBIDs. This should work - # for albums that have missing or extra tracks. If there are - # multiple copies of a recording, they are disambiguated using - # their disc and track number. + # Construct a track mapping according to MBIDs (release track MBIDs + # first, if available, and recording MBIDs otherwise). This should + # work for albums that have missing or extra tracks. mapping = {} for item in items: - candidates = track_index[item.mb_trackid] - if len(candidates) == 1: - mapping[item] = candidates[0] + if item.mb_releasetrackid and \ + item.mb_releasetrackid in releasetrack_index: + mapping[item] = releasetrack_index[item.mb_releasetrackid] else: - for c in candidates: - if (c.medium_index == item.track and - c.medium == item.disc): - mapping[item] = c - break + candidates = track_index[item.mb_trackid] + if len(candidates) == 1: + mapping[item] = candidates[0] + else: + # If there are multiple copies of a recording, they are + # disambiguated using their disc and track number. + for c in candidates: + if (c.medium_index == item.track and + c.medium == item.disc): + mapping[item] = c + break # Apply. self._log.debug(u'applying changes to {}', album_formatted) with lib.transaction(): autotag.apply_metadata(album_info, mapping) changed = False + # Find any changed item to apply MusicBrainz changes to album. + any_changed_item = items[0] for item in items: item_changed = ui.show_model_changes(item) changed |= item_changed if item_changed: + any_changed_item = item apply_item_changes(lib, item, move, pretend, write) if not changed: @@ -158,7 +168,7 @@ class MBSyncPlugin(BeetsPlugin): if not pretend: # Update album structure to reflect an item in it. for key in library.Album.item_keys: - a[key] = items[0][key] + a[key] = any_changed_item[key] a.store() # Move album art (and any inconsistent items). diff --git a/libs/beetsplug/metasync/__init__.py b/libs/beetsplug/metasync/__init__.py index 3fc0be4c..02f0b0f9 100644 --- a/libs/beetsplug/metasync/__init__.py +++ b/libs/beetsplug/metasync/__init__.py @@ -24,6 +24,7 @@ from importlib import import_module from beets.util.confit import ConfigValueError from beets import ui from beets.plugins import BeetsPlugin +import six METASYNC_MODULE = 'beetsplug.metasync' @@ -35,9 +36,7 @@ SOURCES = { } -class MetaSource(object): - __metaclass__ = ABCMeta - +class MetaSource(six.with_metaclass(ABCMeta, object)): def __init__(self, config, log): self.item_types = {} self.config = config diff --git a/libs/beetsplug/metasync/amarok.py b/libs/beetsplug/metasync/amarok.py index aaa1ee91..0622fc17 100644 --- a/libs/beetsplug/metasync/amarok.py +++ b/libs/beetsplug/metasync/amarok.py @@ -21,7 +21,7 @@ from __future__ import division, absolute_import, print_function from os.path import basename from datetime import datetime from time import mktime -from xml.sax.saxutils import escape +from xml.sax.saxutils import quoteattr from beets.util import displayable_path from beets.dbcore import types @@ -51,7 +51,7 @@ class Amarok(MetaSource): queryXML = u'<query version="1.0"> \ <filters> \ - <and><include field="filename" value="%s" /></and> \ + <and><include field="filename" value=%s /></and> \ </filters> \ </query>' @@ -71,7 +71,9 @@ class Amarok(MetaSource): # for the patch relative to the mount point. But the full path is part # of the result set. So query for the filename and then try to match # the correct item from the results we get back - results = self.collection.Query(self.queryXML % escape(basename(path))) + results = self.collection.Query( + self.queryXML % quoteattr(basename(path)) + ) for result in results: if result['xesam:url'] != path: continue diff --git a/libs/beetsplug/metasync/itunes.py b/libs/beetsplug/metasync/itunes.py index a6274684..17ab1637 100644 --- a/libs/beetsplug/metasync/itunes.py +++ b/libs/beetsplug/metasync/itunes.py @@ -23,8 +23,8 @@ import os import shutil import tempfile import plistlib -import urllib -from urlparse import urlparse + +from six.moves.urllib.parse import urlparse, unquote from time import mktime from beets import util @@ -57,7 +57,7 @@ def _norm_itunes_path(path): # E.g., '\\G:\\Music\\bar' needs to be stripped to 'G:\\Music\\bar' return util.bytestring_path(os.path.normpath( - urllib.unquote(urlparse(path).path)).lstrip('\\')).lower() + unquote(urlparse(path).path)).lstrip('\\')).lower() class Itunes(MetaSource): diff --git a/libs/beetsplug/missing.py b/libs/beetsplug/missing.py index 8fff659f..8f0790f2 100644 --- a/libs/beetsplug/missing.py +++ b/libs/beetsplug/missing.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Pedro Silva. +# Copyright 2017, Quentin Young. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -17,11 +18,16 @@ """ from __future__ import division, absolute_import, print_function +import musicbrainzngs + +from musicbrainzngs.musicbrainz import MusicBrainzError +from collections import defaultdict from beets.autotag import hooks from beets.library import Item from beets.plugins import BeetsPlugin from beets.ui import decargs, print_, Subcommand from beets import config +from beets.dbcore import types def _missing_count(album): @@ -81,12 +87,18 @@ def _item(track_info, album_info, album_id): class MissingPlugin(BeetsPlugin): """List missing tracks """ + + album_types = { + 'missing': types.INTEGER, + } + def __init__(self): super(MissingPlugin, self).__init__() self.config.add({ 'count': False, 'total': False, + 'album': False, }) self.album_template_fields['missing'] = _missing_count @@ -100,40 +112,110 @@ class MissingPlugin(BeetsPlugin): self._command.parser.add_option( u'-t', u'--total', dest='total', action='store_true', help=u'count total of missing tracks') + self._command.parser.add_option( + u'-a', u'--album', dest='album', action='store_true', + help=u'show missing albums for artist instead of tracks') self._command.parser.add_format_option() def commands(self): def _miss(lib, opts, args): self.config.set_args(opts) - count = self.config['count'].get() - total = self.config['total'].get() - fmt = config['format_album' if count else 'format_item'].get() + albms = self.config['album'].get() - albums = lib.albums(decargs(args)) - if total: - print(sum([_missing_count(a) for a in albums])) - return - - # Default format string for count mode. - if count: - fmt += ': $missing' - - for album in albums: - if count: - if _missing_count(album): - print_(format(album, fmt)) - - else: - for item in self._missing(album): - print_(format(item, fmt)) + helper = self._missing_albums if albms else self._missing_tracks + helper(lib, decargs(args)) self._command.func = _miss return [self._command] + def _missing_tracks(self, lib, query): + """Print a listing of tracks missing from each album in the library + matching query. + """ + albums = lib.albums(query) + + count = self.config['count'].get() + total = self.config['total'].get() + fmt = config['format_album' if count else 'format_item'].get() + + if total: + print(sum([_missing_count(a) for a in albums])) + return + + # Default format string for count mode. + if count: + fmt += ': $missing' + + for album in albums: + if count: + if _missing_count(album): + print_(format(album, fmt)) + + else: + for item in self._missing(album): + print_(format(item, fmt)) + + def _missing_albums(self, lib, query): + """Print a listing of albums missing from each artist in the library + matching query. + """ + total = self.config['total'].get() + + albums = lib.albums(query) + # build dict mapping artist to list of their albums in library + albums_by_artist = defaultdict(list) + for alb in albums: + artist = (alb['albumartist'], alb['mb_albumartistid']) + albums_by_artist[artist].append(alb) + + total_missing = 0 + + # build dict mapping artist to list of all albums + for artist, albums in albums_by_artist.items(): + if artist[1] is None or artist[1] == "": + albs_no_mbid = [u"'" + a['album'] + u"'" for a in albums] + self._log.info( + u"No musicbrainz ID for artist '{}' found in album(s) {}; " + "skipping", artist[0], u", ".join(albs_no_mbid) + ) + continue + + try: + resp = musicbrainzngs.browse_release_groups(artist=artist[1]) + release_groups = resp['release-group-list'] + except MusicBrainzError as err: + self._log.info( + u"Couldn't fetch info for artist '{}' ({}) - '{}'", + artist[0], artist[1], err + ) + continue + + missing = [] + present = [] + for rg in release_groups: + missing.append(rg) + for alb in albums: + if alb['mb_releasegroupid'] == rg['id']: + missing.remove(rg) + present.append(rg) + break + + total_missing += len(missing) + if total: + continue + + missing_titles = {rg['title'] for rg in missing} + + for release_title in missing_titles: + print_(u"{} - {}".format(artist[0], release_title)) + + if total: + print(total_missing) + def _missing(self, album): """Query MusicBrainz to determine items missing from `album`. """ - item_mbids = map(lambda x: x.mb_trackid, album.items()) + item_mbids = [x.mb_trackid for x in album.items()] if len([i for i in album.items()]) < album.albumtotal: # fetch missing items # TODO: Implement caching that without breaking other stuff diff --git a/libs/beetsplug/mpdstats.py b/libs/beetsplug/mpdstats.py index 2b642294..e5e82d48 100644 --- a/libs/beetsplug/mpdstats.py +++ b/libs/beetsplug/mpdstats.py @@ -40,36 +40,24 @@ mpd_config = config['mpd'] def is_url(path): """Try to determine if the path is an URL. """ + if isinstance(path, bytes): # if it's bytes, then it's a path + return False return path.split('://', 1)[0] in ['http', 'https'] -# Use the MPDClient internals to get unicode. -# see http://www.tarmack.eu/code/mpdunicode.py for the general idea -class MPDClient(mpd.MPDClient): - def _write_command(self, command, args=[]): - args = [unicode(arg).encode('utf-8') for arg in args] - super(MPDClient, self)._write_command(command, args) - - def _read_line(self): - line = super(MPDClient, self)._read_line() - if line is not None: - return line.decode('utf-8') - return None - - class MPDClientWrapper(object): def __init__(self, log): self._log = log self.music_directory = ( - mpd_config['music_directory'].get(unicode)) + mpd_config['music_directory'].as_str()) - self.client = MPDClient() + self.client = mpd.MPDClient(use_unicode=True) def connect(self): """Connect to the MPD. """ - host = mpd_config['host'].get(unicode) + host = mpd_config['host'].as_str() port = mpd_config['port'].get(int) if host[0] in ['/', '~']: @@ -81,7 +69,7 @@ class MPDClientWrapper(object): except socket.error as e: raise ui.UserError(u'could not connect to MPD: {0}'.format(e)) - password = mpd_config['password'].get(unicode) + password = mpd_config['password'].as_str() if password: try: self.client.password(password) @@ -268,32 +256,41 @@ class MPDStats(object): if not path: return - if is_url(path): - self._log.info(u'playing stream {0}', displayable_path(path)) - return - played, duration = map(int, status['time'].split(':', 1)) remaining = duration - played - if self.now_playing and self.now_playing['path'] != path: - skipped = self.handle_song_change(self.now_playing) - # mpd responds twice on a natural new song start - going_to_happen_twice = not skipped - else: - going_to_happen_twice = False + if self.now_playing: + if self.now_playing['path'] != path: + self.handle_song_change(self.now_playing) + else: + # In case we got mpd play event with same song playing + # multiple times, + # assume low diff means redundant second play event + # after natural song start. + diff = abs(time.time() - self.now_playing['started']) - if not going_to_happen_twice: - self._log.info(u'playing {0}', displayable_path(path)) + if diff <= self.time_threshold: + return - self.now_playing = { - 'started': time.time(), - 'remaining': remaining, - 'path': path, - 'beets_item': self.get_item(path), - } + if self.now_playing['path'] == path and played == 0: + self.handle_song_change(self.now_playing) - self.update_item(self.now_playing['beets_item'], - 'last_played', value=int(time.time())) + if is_url(path): + self._log.info(u'playing stream {0}', displayable_path(path)) + self.now_playing = None + return + + self._log.info(u'playing {0}', displayable_path(path)) + + self.now_playing = { + 'started': time.time(), + 'remaining': remaining, + 'path': path, + 'beets_item': self.get_item(path), + } + + self.update_item(self.now_playing['beets_item'], + 'last_played', value=int(time.time())) def run(self): self.mpd.connect() @@ -328,7 +325,7 @@ class MPDStatsPlugin(plugins.BeetsPlugin): 'music_directory': config['directory'].as_filename(), 'rating': True, 'rating_mix': 0.75, - 'host': u'localhost', + 'host': os.environ.get('MPD_HOST', u'localhost'), 'port': 6600, 'password': u'', }) @@ -353,11 +350,11 @@ class MPDStatsPlugin(plugins.BeetsPlugin): # Overrides for MPD settings. if opts.host: - mpd_config['host'] = opts.host.decode('utf8') + mpd_config['host'] = opts.host.decode('utf-8') if opts.port: mpd_config['host'] = int(opts.port) if opts.password: - mpd_config['password'] = opts.password.decode('utf8') + mpd_config['password'] = opts.password.decode('utf-8') try: MPDStats(lib, self._log).run() diff --git a/libs/beetsplug/mpdupdate.py b/libs/beetsplug/mpdupdate.py index f828ba5d..6ecc9213 100644 --- a/libs/beetsplug/mpdupdate.py +++ b/libs/beetsplug/mpdupdate.py @@ -27,6 +27,7 @@ from beets.plugins import BeetsPlugin import os import socket from beets import config +import six # No need to introduce a dependency on an MPD library for such a @@ -34,14 +35,14 @@ from beets import config # easier. class BufferedSocket(object): """Socket abstraction that allows reading by line.""" - def __init__(self, host, port, sep='\n'): + def __init__(self, host, port, sep=b'\n'): if host[0] in ['/', '~']: self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self.sock.connect(os.path.expanduser(host)) else: self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.connect((host, port)) - self.buf = '' + self.buf = b'' self.sep = sep def readline(self): @@ -50,11 +51,11 @@ class BufferedSocket(object): if not data: break self.buf += data - if '\n' in self.buf: + if self.sep in self.buf: res, self.buf = self.buf.split(self.sep, 1) return res + self.sep else: - return '' + return b'' def send(self, data): self.sock.send(data) @@ -67,7 +68,7 @@ class MPDUpdatePlugin(BeetsPlugin): def __init__(self): super(MPDUpdatePlugin, self).__init__() config['mpd'].add({ - 'host': u'localhost', + 'host': os.environ.get('MPD_HOST', u'localhost'), 'port': 6600, 'password': u'', }) @@ -86,9 +87,9 @@ class MPDUpdatePlugin(BeetsPlugin): def update(self, lib): self.update_mpd( - config['mpd']['host'].get(unicode), + config['mpd']['host'].as_str(), config['mpd']['port'].get(int), - config['mpd']['password'].get(unicode), + config['mpd']['password'].as_str(), ) def update_mpd(self, host='localhost', port=6600, password=None): @@ -101,28 +102,28 @@ class MPDUpdatePlugin(BeetsPlugin): s = BufferedSocket(host, port) except socket.error as e: self._log.warning(u'MPD connection failed: {0}', - unicode(e.strerror)) + six.text_type(e.strerror)) return resp = s.readline() - if 'OK MPD' not in resp: + if b'OK MPD' not in resp: self._log.warning(u'MPD connection failed: {0!r}', resp) return if password: - s.send('password "%s"\n' % password) + s.send(b'password "%s"\n' % password.encode('utf8')) resp = s.readline() - if 'OK' not in resp: + if b'OK' not in resp: self._log.warning(u'Authentication failed: {0!r}', resp) - s.send('close\n') + s.send(b'close\n') s.close() return - s.send('update\n') + s.send(b'update\n') resp = s.readline() - if 'updating_db' not in resp: + if b'updating_db' not in resp: self._log.warning(u'Update failed: {0!r}', resp) - s.send('close\n') + s.send(b'close\n') s.close() self._log.info(u'Database updated.') diff --git a/libs/beetsplug/permissions.py b/libs/beetsplug/permissions.py index 0de8978c..dd9e0984 100644 --- a/libs/beetsplug/permissions.py +++ b/libs/beetsplug/permissions.py @@ -13,24 +13,43 @@ import os from beets import config, util from beets.plugins import BeetsPlugin from beets.util import ancestry +import six def convert_perm(perm): - """If the perm is a int it will first convert it to a string and back - to an oct int. Else it just converts it to oct. + """Convert a string to an integer, interpreting the text as octal. + Or, if `perm` is an integer, reinterpret it as an octal number that + has been "misinterpreted" as decimal. """ - if isinstance(perm, int): - return int(bytes(perm), 8) - else: - return int(perm, 8) + if isinstance(perm, six.integer_types): + perm = six.text_type(perm) + return int(perm, 8) def check_permissions(path, permission): - """Checks the permissions of a path. + """Check whether the file's permissions equal the given vector. + Return a boolean. """ return oct(os.stat(path).st_mode & 0o777) == oct(permission) +def assert_permissions(path, permission, log): + """Check whether the file's permissions are as expected, otherwise, + log a warning message. Return a boolean indicating the match, like + `check_permissions`. + """ + if not check_permissions(util.syspath(path), permission): + log.warning( + u'could not set permissions on {}', + util.displayable_path(path), + ) + log.debug( + u'set permissions to {}, but permissions are now {}', + permission, + os.stat(util.syspath(path)).st_mode & 0o777, + ) + + def dirs_in_library(library, item): """Creates a list of ancestor directories in the beets library path. """ @@ -45,58 +64,60 @@ class Permissions(BeetsPlugin): # Adding defaults. self.config.add({ - u'file': 644, - u'dir': 755 + u'file': '644', + u'dir': '755', }) - self.register_listener('item_imported', permissions) - self.register_listener('album_imported', permissions) + self.register_listener('item_imported', self.fix) + self.register_listener('album_imported', self.fix) + def fix(self, lib, item=None, album=None): + """Fix the permissions for an imported Item or Album. + """ + # Get the configured permissions. The user can specify this either a + # string (in YAML quotes) or, for convenience, as an integer so the + # quotes can be omitted. In the latter case, we need to reinterpret the + # integer as octal, not decimal. + file_perm = config['permissions']['file'].get() + dir_perm = config['permissions']['dir'].get() + file_perm = convert_perm(file_perm) + dir_perm = convert_perm(dir_perm) -def permissions(lib, item=None, album=None): - """Running the permission fixer. - """ - # Getting the config. - file_perm = config['permissions']['file'].get() - dir_perm = config['permissions']['dir'].get() + # Create chmod_queue. + file_chmod_queue = [] + if item: + file_chmod_queue.append(item.path) + elif album: + for album_item in album.items(): + file_chmod_queue.append(album_item.path) - # Converts permissions to oct. - file_perm = convert_perm(file_perm) - dir_perm = convert_perm(dir_perm) + # A set of directories to change permissions for. + dir_chmod_queue = set() - # Create chmod_queue. - file_chmod_queue = [] - if item: - file_chmod_queue.append(item.path) - elif album: - for album_item in album.items(): - file_chmod_queue.append(album_item.path) + for path in file_chmod_queue: + # Changing permissions on the destination file. + self._log.debug( + u'setting file permissions on {}', + util.displayable_path(path), + ) + os.chmod(util.syspath(path), file_perm) - # A set of directories to change permissions for. - dir_chmod_queue = set() + # Checks if the destination path has the permissions configured. + assert_permissions(path, file_perm, self._log) - for path in file_chmod_queue: - # Changing permissions on the destination file. - os.chmod(util.bytestring_path(path), file_perm) + # Adding directories to the directory chmod queue. + dir_chmod_queue.update( + dirs_in_library(lib.directory, + path)) - # Checks if the destination path has the permissions configured. - if not check_permissions(util.bytestring_path(path), file_perm): - message = u'There was a problem setting permission on {}'.format( - path) - print(message) + # Change permissions for the directories. + for path in dir_chmod_queue: + # Chaning permissions on the destination directory. + self._log.debug( + u'setting directory permissions on {}', + util.displayable_path(path), + ) + os.chmod(util.syspath(path), dir_perm) - # Adding directories to the directory chmod queue. - dir_chmod_queue.update( - dirs_in_library(lib.directory, - path)) - - # Change permissions for the directories. - for path in dir_chmod_queue: - # Chaning permissions on the destination directory. - os.chmod(util.bytestring_path(path), dir_perm) - - # Checks if the destination path has the permissions configured. - if not check_permissions(util.bytestring_path(path), dir_perm): - message = u'There was a problem setting permission on {}'.format( - path) - print(message) + # Checks if the destination path has the permissions configured. + assert_permissions(path, dir_perm, self._log) diff --git a/libs/beetsplug/play.py b/libs/beetsplug/play.py index fa70f2bc..4d32a357 100644 --- a/libs/beetsplug/play.py +++ b/libs/beetsplug/play.py @@ -19,17 +19,41 @@ from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets.ui import Subcommand +from beets.ui.commands import PromptChoice from beets import config from beets import ui from beets import util from os.path import relpath from tempfile import NamedTemporaryFile +import subprocess # Indicate where arguments should be inserted into the command string. # If this is missing, they're placed at the end. ARGS_MARKER = '$args' +def play(command_str, selection, paths, open_args, log, item_type='track', + keep_open=False): + """Play items in paths with command_str and optional arguments. If + keep_open, return to beets, otherwise exit once command runs. + """ + # Print number of tracks or albums to be played, log command to be run. + item_type += 's' if len(selection) > 1 else '' + ui.print_(u'Playing {0} {1}.'.format(len(selection), item_type)) + log.debug(u'executing command: {} {!r}', command_str, open_args) + + try: + if keep_open: + command = util.shlex_split(command_str) + command = command + open_args + subprocess.call(command) + else: + util.interactive_open(open_args, command_str) + except OSError as exc: + raise ui.UserError( + "Could not play the query: {0}".format(exc)) + + class PlayPlugin(BeetsPlugin): def __init__(self): @@ -40,11 +64,12 @@ class PlayPlugin(BeetsPlugin): 'use_folders': False, 'relative_to': None, 'raw': False, - # Backwards compatibility. See #1803 and line 74 - 'warning_threshold': -2, - 'warning_treshold': 100, + 'warning_threshold': 100, }) + self.register_listener('before_choose_candidate', + self.before_choose_candidate_listener) + def commands(self): play_command = Subcommand( 'play', @@ -56,41 +81,22 @@ class PlayPlugin(BeetsPlugin): action='store', help=u'add additional arguments to the command', ) - play_command.func = self.play_music + play_command.parser.add_option( + u'-y', u'--yes', + action="store_true", + help=u'skip the warning threshold', + ) + play_command.func = self._play_command return [play_command] - def play_music(self, lib, opts, args): - """Execute query, create temporary playlist and execute player - command passing that playlist, at request insert optional arguments. + def _play_command(self, lib, opts, args): + """The CLI command function for `beet play`. Create a list of paths + from query, determine if tracks or albums are to be played. """ - command_str = config['play']['command'].get() - if not command_str: - command_str = util.open_anything() use_folders = config['play']['use_folders'].get(bool) relative_to = config['play']['relative_to'].get() - raw = config['play']['raw'].get(bool) - warning_threshold = config['play']['warning_threshold'].get(int) - # We use -2 as a default value for warning_threshold to detect if it is - # set or not. We can't use a falsey value because it would have an - # actual meaning in the configuration of this plugin, and we do not use - # -1 because some people might use it as a value to obtain no warning, - # which wouldn't be that bad of a practice. - if warning_threshold == -2: - # if warning_threshold has not been set by user, look for - # warning_treshold, to preserve backwards compatibility. See #1803. - # warning_treshold has the correct default value of 100. - warning_threshold = config['play']['warning_treshold'].get(int) - if relative_to: relative_to = util.normpath(relative_to) - - # Add optional arguments to the player command. - if opts.args: - if ARGS_MARKER in command_str: - command_str = command_str.replace(ARGS_MARKER, opts.args) - else: - command_str = u"{} {}".format(command_str, opts.args) - # Perform search by album and add folders rather than tracks to # playlist. if opts.album: @@ -110,46 +116,95 @@ class PlayPlugin(BeetsPlugin): else: selection = lib.items(ui.decargs(args)) paths = [item.path for item in selection] - if relative_to: - paths = [relpath(path, relative_to) for path in paths] item_type = 'track' - item_type += 's' if len(selection) > 1 else '' + if relative_to: + paths = [relpath(path, relative_to) for path in paths] if not selection: ui.print_(ui.colorize('text_warning', u'No {0} to play.'.format(item_type))) return + open_args = self._playlist_or_paths(paths) + command_str = self._command_str(opts.args) + + # Check if the selection exceeds configured threshold. If True, + # cancel, otherwise proceed with play command. + if opts.yes or not self._exceeds_threshold( + selection, command_str, open_args, item_type): + play(command_str, selection, paths, open_args, self._log, + item_type) + + def _command_str(self, args=None): + """Create a command string from the config command and optional args. + """ + command_str = config['play']['command'].get() + if not command_str: + return util.open_anything() + # Add optional arguments to the player command. + if args: + if ARGS_MARKER in command_str: + return command_str.replace(ARGS_MARKER, args) + else: + return u"{} {}".format(command_str, args) + else: + # Don't include the marker in the command. + return command_str.replace(" " + ARGS_MARKER, "") + + def _playlist_or_paths(self, paths): + """Return either the raw paths of items or a playlist of the items. + """ + if config['play']['raw']: + return paths + else: + return [self._create_tmp_playlist(paths)] + + def _exceeds_threshold(self, selection, command_str, open_args, + item_type='track'): + """Prompt user whether to abort if playlist exceeds threshold. If + True, cancel playback. If False, execute play command. + """ + warning_threshold = config['play']['warning_threshold'].get(int) + # Warn user before playing any huge playlists. if warning_threshold and len(selection) > warning_threshold: + if len(selection) > 1: + item_type += 's' + ui.print_(ui.colorize( 'text_warning', u'You are about to queue {0} {1}.'.format( len(selection), item_type))) - if ui.input_options(('Continue', 'Abort')) == 'a': - return + if ui.input_options((u'Continue', u'Abort')) == 'a': + return True - ui.print_(u'Playing {0} {1}.'.format(len(selection), item_type)) - if raw: - open_args = paths - else: - open_args = [self._create_tmp_playlist(paths)] - - self._log.debug(u'executing command: {} {}', command_str, - b' '.join(open_args)) - try: - util.interactive_open(open_args, command_str) - except OSError as exc: - raise ui.UserError( - "Could not play the query: {0}".format(exc)) + return False def _create_tmp_playlist(self, paths_list): """Create a temporary .m3u file. Return the filename. """ - m3u = NamedTemporaryFile('w', suffix='.m3u', delete=False) + m3u = NamedTemporaryFile('wb', suffix='.m3u', delete=False) for item in paths_list: m3u.write(item + b'\n') m3u.close() return m3u.name + + def before_choose_candidate_listener(self, session, task): + """Append a "Play" choice to the interactive importer prompt. + """ + return [PromptChoice('y', 'plaY', self.importer_play)] + + def importer_play(self, session, task): + """Get items from current import task and send to play function. + """ + selection = task.items + paths = [item.path for item in selection] + + open_args = self._playlist_or_paths(paths) + command_str = self._command_str() + + if not self._exceeds_threshold(selection, command_str, open_args): + play(command_str, selection, paths, open_args, self._log, + keep_open=True) diff --git a/libs/beetsplug/plexupdate.py b/libs/beetsplug/plexupdate.py index ef50fde7..17fd8208 100644 --- a/libs/beetsplug/plexupdate.py +++ b/libs/beetsplug/plexupdate.py @@ -12,9 +12,8 @@ Put something like the following in your config.yaml to configure: from __future__ import division, absolute_import, print_function import requests -from urlparse import urljoin -from urllib import urlencode import xml.etree.ElementTree as ET +from six.moves.urllib.parse import urljoin, urlencode from beets import config from beets.plugins import BeetsPlugin @@ -68,6 +67,7 @@ class PlexUpdate(BeetsPlugin): u'token': u'', u'library_name': u'Music'}) + config['plex']['token'].redact = True self.register_listener('database_change', self.listen_for_db_change) def listen_for_db_change(self, lib, model): diff --git a/libs/beetsplug/random.py b/libs/beetsplug/random.py index e1c6fea4..65caaf90 100644 --- a/libs/beetsplug/random.py +++ b/libs/beetsplug/random.py @@ -24,56 +24,124 @@ from operator import attrgetter from itertools import groupby -def random_item(lib, opts, args): - query = decargs(args) +def _length(obj, album): + """Get the duration of an item or album. + """ + if album: + return sum(i.length for i in obj.items()) + else: + return obj.length + +def _equal_chance_permutation(objs, field='albumartist'): + """Generate (lazily) a permutation of the objects where every group + with equal values for `field` have an equal chance of appearing in + any given position. + """ + # Group the objects by artist so we can sample from them. + key = attrgetter(field) + objs.sort(key=key) + objs_by_artists = {} + for artist, v in groupby(objs, key): + objs_by_artists[artist] = list(v) + + # While we still have artists with music to choose from, pick one + # randomly and pick a track from that artist. + while objs_by_artists: + # Choose an artist and an object for that artist, removing + # this choice from the pool. + artist = random.choice(list(objs_by_artists.keys())) + objs_from_artist = objs_by_artists[artist] + i = random.randint(0, len(objs_from_artist) - 1) + yield objs_from_artist.pop(i) + + # Remove the artist if we've used up all of its objects. + if not objs_from_artist: + del objs_by_artists[artist] + + +def _take(iter, num): + """Return a list containing the first `num` values in `iter` (or + fewer, if the iterable ends early). + """ + out = [] + for val in iter: + out.append(val) + num -= 1 + if num <= 0: + break + return out + + +def _take_time(iter, secs, album): + """Return a list containing the first values in `iter`, which should + be Item or Album objects, that add up to the given amount of time in + seconds. + """ + out = [] + total_time = 0.0 + for obj in iter: + length = _length(obj, album) + if total_time + length <= secs: + out.append(obj) + total_time += length + return out + + +def random_objs(objs, album, number=1, time=None, equal_chance=False): + """Get a random subset of the provided `objs`. + + If `number` is provided, produce that many matches. Otherwise, if + `time` is provided, instead select a list whose total time is close + to that number of minutes. If `equal_chance` is true, give each + artist an equal chance of being included so that artists with more + songs are not represented disproportionately. + """ + # Permute the objects either in a straightforward way or an + # artist-balanced way. + if equal_chance: + perm = _equal_chance_permutation(objs) + else: + perm = objs + random.shuffle(perm) # N.B. This shuffles the original list. + + # Select objects by time our count. + if time: + return _take_time(perm, time * 60, album) + else: + return _take(perm, number) + + +def random_func(lib, opts, args): + """Select some random items or albums and print the results. + """ + # Fetch all the objects matching the query into a list. + query = decargs(args) if opts.album: objs = list(lib.albums(query)) else: objs = list(lib.items(query)) - if opts.equal_chance: - # Group the objects by artist so we can sample from them. - key = attrgetter('albumartist') - objs.sort(key=key) - objs_by_artists = {} - for artist, v in groupby(objs, key): - objs_by_artists[artist] = list(v) + # Print a random subset. + objs = random_objs(objs, opts.album, opts.number, opts.time, + opts.equal_chance) + for obj in objs: + print_(format(obj)) - objs = [] - for _ in range(opts.number): - # Terminate early if we're out of objects to select. - if not objs_by_artists: - break - - # Choose an artist and an object for that artist, removing - # this choice from the pool. - artist = random.choice(objs_by_artists.keys()) - objs_from_artist = objs_by_artists[artist] - i = random.randint(0, len(objs_from_artist) - 1) - objs.append(objs_from_artist.pop(i)) - - # Remove the artist if we've used up all of its objects. - if not objs_from_artist: - del objs_by_artists[artist] - - else: - number = min(len(objs), opts.number) - objs = random.sample(objs, number) - - for item in objs: - print_(format(item)) random_cmd = Subcommand('random', - help=u'chose a random track or album') + help=u'choose a random track or album') random_cmd.parser.add_option( u'-n', u'--number', action='store', type="int", help=u'number of objects to choose', default=1) random_cmd.parser.add_option( u'-e', u'--equal-chance', action='store_true', help=u'each artist has the same chance') +random_cmd.parser.add_option( + u'-t', u'--time', action='store', type="float", + help=u'total length in minutes of objects to choose') random_cmd.parser.add_all_common_options() -random_cmd.func = random_item +random_cmd.func = random_func class Random(BeetsPlugin): diff --git a/libs/beetsplug/replaygain.py b/libs/beetsplug/replaygain.py index 7bb2aa39..a7eb81b5 100644 --- a/libs/beetsplug/replaygain.py +++ b/libs/beetsplug/replaygain.py @@ -18,15 +18,15 @@ from __future__ import division, absolute_import, print_function import subprocess import os import collections -import itertools import sys import warnings -import re +import xml.parsers.expat +from six.moves import zip -from beets import logging from beets import ui from beets.plugins import BeetsPlugin -from beets.util import syspath, command_output, displayable_path +from beets.util import (syspath, command_output, bytestring_path, + displayable_path, py3_path) # Utilities. @@ -60,7 +60,7 @@ def call(args): except UnicodeEncodeError: # Due to a bug in Python 2's subprocess on Windows, Unicode # filenames can fail to encode on that platform. See: - # http://code.google.com/p/beets/issues/detail?id=499 + # https://github.com/google-code-export/beets/issues/499 raise ReplayGainError(u"argument encoding failed") @@ -102,9 +102,9 @@ class Bs1770gainBackend(Backend): 'method': 'replaygain', }) self.chunk_at = config['chunk_at'].as_number() - self.method = b'--' + bytes(config['method'].get(unicode)) + self.method = '--' + config['method'].as_str() - cmd = b'bs1770gain' + cmd = 'bs1770gain' try: call([cmd, self.method]) self.command = cmd @@ -194,13 +194,14 @@ class Bs1770gainBackend(Backend): """ # Construct shell command. cmd = [self.command] - cmd = cmd + [self.method] - cmd = cmd + [b'-it'] + cmd += [self.method] + cmd += ['--xml', '-p'] # Workaround for Windows: the underlying tool fails on paths # with the \\?\ prefix, so we don't use it here. This # prevents the backend from working with long paths. args = cmd + [syspath(i.path, prefix=False) for i in items] + path_list = [i.path for i in items] # Invoke the command. self._log.debug( @@ -209,40 +210,65 @@ class Bs1770gainBackend(Backend): output = call(args) self._log.debug(u'analysis finished: {0}', output) - results = self.parse_tool_output(output, - len(items) + is_album) + results = self.parse_tool_output(output, path_list, is_album) self._log.debug(u'{0} items, {1} results', len(items), len(results)) return results - def parse_tool_output(self, text, num_lines): + def parse_tool_output(self, text, path_list, is_album): """Given the output from bs1770gain, parse the text and return a list of dictionaries containing information about each analyzed file. """ - out = [] - data = text.decode('utf8', errors='ignore') - regex = re.compile( - ur'(\s{2,2}\[\d+\/\d+\].*?|\[ALBUM\].*?)' - '(?=\s{2,2}\[\d+\/\d+\]|\s{2,2}\[ALBUM\]' - ':|done\.\s)', re.DOTALL | re.UNICODE) - results = re.findall(regex, data) - for parts in results[0:num_lines]: - part = parts.split(b'\n') - if len(part) == 0: - self._log.debug(u'bad tool output: {0!r}', text) - raise ReplayGainError(u'bs1770gain failed') + per_file_gain = {} + album_gain = {} # mutable variable so it can be set from handlers + parser = xml.parsers.expat.ParserCreate(encoding='utf-8') + state = {'file': None, 'gain': None, 'peak': None} - try: - song = { - 'file': part[0], - 'gain': float((part[1].split('/'))[1].split('LU')[0]), - 'peak': float(part[2].split('/')[1]), - } - except IndexError: - self._log.info(u'bs1770gain reports (faulty file?): {}', parts) - continue + def start_element_handler(name, attrs): + if name == u'track': + state['file'] = bytestring_path(attrs[u'file']) + if state['file'] in per_file_gain: + raise ReplayGainError( + u'duplicate filename in bs1770gain output') + elif name == u'integrated': + state['gain'] = float(attrs[u'lu']) + elif name == u'sample-peak': + state['peak'] = float(attrs[u'factor']) - out.append(Gain(song['gain'], song['peak'])) + def end_element_handler(name): + if name == u'track': + if state['gain'] is None or state['peak'] is None: + raise ReplayGainError(u'could not parse gain or peak from ' + 'the output of bs1770gain') + per_file_gain[state['file']] = Gain(state['gain'], + state['peak']) + state['gain'] = state['peak'] = None + elif name == u'summary': + if state['gain'] is None or state['peak'] is None: + raise ReplayGainError(u'could not parse gain or peak from ' + 'the output of bs1770gain') + album_gain["album"] = Gain(state['gain'], state['peak']) + state['gain'] = state['peak'] = None + parser.StartElementHandler = start_element_handler + parser.EndElementHandler = end_element_handler + parser.Parse(text, True) + + if len(per_file_gain) != len(path_list): + raise ReplayGainError( + u'the number of results returned by bs1770gain does not match ' + 'the number of files passed to it') + + # bs1770gain does not return the analysis results in the order that + # files are passed on the command line, because it is sorting the files + # internally. We must recover the order from the filenames themselves. + try: + out = [per_file_gain[os.path.basename(p)] for p in path_list] + except KeyError: + raise ReplayGainError( + u'unrecognized filename in bs1770gain output ' + '(bs1770gain can only deal with utf-8 file names)') + if is_album: + out.append(album_gain["album"]) return out @@ -256,7 +282,7 @@ class CommandBackend(Backend): 'noclip': True, }) - self.command = config["command"].get(unicode) + self.command = config["command"].as_str() if self.command: # Explicit executable path. @@ -267,9 +293,9 @@ class CommandBackend(Backend): ) else: # Check whether the program is in $PATH. - for cmd in (b'mp3gain', b'aacgain'): + for cmd in ('mp3gain', 'aacgain'): try: - call([cmd, b'-v']) + call([cmd, '-v']) self.command = cmd except OSError: pass @@ -286,7 +312,7 @@ class CommandBackend(Backend): """Computes the track gain of the given tracks, returns a list of TrackGain objects. """ - supported_items = filter(self.format_supported, items) + supported_items = list(filter(self.format_supported, items)) output = self.compute_gain(supported_items, False) return output @@ -297,7 +323,7 @@ class CommandBackend(Backend): # TODO: What should be done when not all tracks in the album are # supported? - supported_items = filter(self.format_supported, album.items()) + supported_items = list(filter(self.format_supported, album.items())) if len(supported_items) != len(album.items()): self._log.debug(u'tracks are of unsupported format') return AlbumGain(None, []) @@ -334,14 +360,14 @@ class CommandBackend(Backend): # tag-writing; this turns the mp3gain/aacgain tool into a gain # calculator rather than a tag manipulator because we take care # of changing tags ourselves. - cmd = [self.command, b'-o', b'-s', b's'] + cmd = [self.command, '-o', '-s', 's'] if self.noclip: # Adjust to avoid clipping. - cmd = cmd + [b'-k'] + cmd = cmd + ['-k'] else: # Disable clipping warning. - cmd = cmd + [b'-c'] - cmd = cmd + [b'-d', bytes(self.gain_offset)] + cmd = cmd + ['-c'] + cmd = cmd + ['-d', str(self.gain_offset)] cmd = cmd + [syspath(i.path) for i in items] self._log.debug(u'analyzing {0} files', len(items)) @@ -574,7 +600,7 @@ class GStreamerBackend(Backend): self._file = self._files.pop(0) self._pipe.set_state(self.Gst.State.NULL) - self._src.set_property("location", syspath(self._file.path)) + self._src.set_property("location", py3_path(syspath(self._file.path))) self._pipe.set_state(self.Gst.State.PLAYING) return True @@ -587,16 +613,6 @@ class GStreamerBackend(Backend): self._file = self._files.pop(0) - # Disconnect the decodebin element from the pipeline, set its - # state to READY to to clear it. - self._decbin.unlink(self._conv) - self._decbin.set_state(self.Gst.State.READY) - - # Set a new file on the filesrc element, can only be done in the - # READY state - self._src.set_state(self.Gst.State.READY) - self._src.set_property("location", syspath(self._file.path)) - # Ensure the filesrc element received the paused state of the # pipeline in a blocking manner self._src.sync_state_with_parent() @@ -607,6 +623,19 @@ class GStreamerBackend(Backend): self._decbin.sync_state_with_parent() self._decbin.get_state(self.Gst.CLOCK_TIME_NONE) + # Disconnect the decodebin element from the pipeline, set its + # state to READY to to clear it. + self._decbin.unlink(self._conv) + self._decbin.set_state(self.Gst.State.READY) + + # Set a new file on the filesrc element, can only be done in the + # READY state + self._src.set_state(self.Gst.State.READY) + self._src.set_property("location", py3_path(syspath(self._file.path))) + + self._decbin.link(self._conv) + self._pipe.set_state(self.Gst.State.READY) + return True def _set_next_file(self): @@ -794,7 +823,7 @@ class ReplayGainPlugin(BeetsPlugin): "command": CommandBackend, "gstreamer": GStreamerBackend, "audiotools": AudioToolsBackend, - "bs1770gain": Bs1770gainBackend + "bs1770gain": Bs1770gainBackend, } def __init__(self): @@ -806,10 +835,11 @@ class ReplayGainPlugin(BeetsPlugin): 'auto': True, 'backend': u'command', 'targetlevel': 89, + 'r128': ['Opus'], }) self.overwrite = self.config['overwrite'].get(bool) - backend_name = self.config['backend'].get(unicode) + backend_name = self.config['backend'].as_str() if backend_name not in self.backends: raise ui.UserError( u"Selected ReplayGain backend {0} is not supported. " @@ -823,6 +853,9 @@ class ReplayGainPlugin(BeetsPlugin): if self.config['auto']: self.import_stages = [self.imported] + # Formats to use R128. + self.r128_whitelist = self.config['r128'].as_str_seq() + try: self.backend_instance = self.backends[backend_name]( self.config, self._log @@ -831,9 +864,19 @@ class ReplayGainPlugin(BeetsPlugin): raise ui.UserError( u'replaygain initialization failed: {0}'.format(e)) + self.r128_backend_instance = '' + + def should_use_r128(self, item): + """Checks the plugin setting to decide whether the calculation + should be done using the EBU R128 standard and use R128_ tags instead. + """ + return item.format in self.r128_whitelist + def track_requires_gain(self, item): return self.overwrite or \ - (not item.rg_track_gain or not item.rg_track_peak) + (self.should_use_r128(item) and not item.r128_track_gain) or \ + (not self.should_use_r128(item) and + (not item.rg_track_gain or not item.rg_track_peak)) def album_requires_gain(self, album): # Skip calculating gain only when *all* files don't need @@ -841,8 +884,12 @@ class ReplayGainPlugin(BeetsPlugin): # needs recalculation, we still get an accurate album gain # value. return self.overwrite or \ - any([not item.rg_album_gain or not item.rg_album_peak - for item in album.items()]) + any([self.should_use_r128(item) and + (not item.r128_track_gain or not item.r128_album_gain) + for item in album.items()]) or \ + any([not self.should_use_r128(item) and + (not item.rg_album_gain or not item.rg_album_peak) + for item in album.items()]) def store_track_gain(self, item, track_gain): item.rg_track_gain = track_gain.gain @@ -852,6 +899,12 @@ class ReplayGainPlugin(BeetsPlugin): self._log.debug(u'applied track gain {0}, peak {1}', item.rg_track_gain, item.rg_track_peak) + def store_track_r128_gain(self, item, track_gain): + item.r128_track_gain = int(round(track_gain.gain * pow(2, 8))) + item.store() + + self._log.debug(u'applied track gain {0}', item.r128_track_gain) + def store_album_gain(self, album, album_gain): album.rg_album_gain = album_gain.gain album.rg_album_peak = album_gain.peak @@ -860,7 +913,13 @@ class ReplayGainPlugin(BeetsPlugin): self._log.debug(u'applied album gain {0}, peak {1}', album.rg_album_gain, album.rg_album_peak) - def handle_album(self, album, write): + def store_album_r128_gain(self, album, album_gain): + album.r128_album_gain = int(round(album_gain.gain * pow(2, 8))) + album.store() + + self._log.debug(u'applied album gain {0}', album.r128_album_gain) + + def handle_album(self, album, write, force=False): """Compute album and track replay gain store it in all of the album's items. @@ -868,24 +927,41 @@ class ReplayGainPlugin(BeetsPlugin): item. If replay gain information is already present in all items, nothing is done. """ - if not self.album_requires_gain(album): + if not force and not self.album_requires_gain(album): self._log.info(u'Skipping album {0}', album) return self._log.info(u'analyzing {0}', album) + if (any([self.should_use_r128(item) for item in album.items()]) and not + all(([self.should_use_r128(item) for item in album.items()]))): + raise ReplayGainError( + u"Mix of ReplayGain and EBU R128 detected" + u" for some tracks in album {0}".format(album) + ) + + if any([self.should_use_r128(item) for item in album.items()]): + if self.r128_backend_instance == '': + self.init_r128_backend() + backend_instance = self.r128_backend_instance + store_track_gain = self.store_track_r128_gain + store_album_gain = self.store_album_r128_gain + else: + backend_instance = self.backend_instance + store_track_gain = self.store_track_gain + store_album_gain = self.store_album_gain + try: - album_gain = self.backend_instance.compute_album_gain(album) + album_gain = backend_instance.compute_album_gain(album) if len(album_gain.track_gains) != len(album.items()): raise ReplayGainError( u"ReplayGain backend failed " u"for some tracks in album {0}".format(album) ) - self.store_album_gain(album, album_gain.album_gain) - for item, track_gain in itertools.izip(album.items(), - album_gain.track_gains): - self.store_track_gain(item, track_gain) + store_album_gain(album, album_gain.album_gain) + for item, track_gain in zip(album.items(), album_gain.track_gains): + store_track_gain(item, track_gain) if write: item.try_write() except ReplayGainError as e: @@ -894,27 +970,36 @@ class ReplayGainPlugin(BeetsPlugin): raise ui.UserError( u"Fatal replay gain error: {0}".format(e)) - def handle_track(self, item, write): + def handle_track(self, item, write, force=False): """Compute track replay gain and store it in the item. If ``write`` is truthy then ``item.write()`` is called to write the data to disk. If replay gain information is already present in the item, nothing is done. """ - if not self.track_requires_gain(item): + if not force and not self.track_requires_gain(item): self._log.info(u'Skipping track {0}', item) return self._log.info(u'analyzing {0}', item) + if self.should_use_r128(item): + if self.r128_backend_instance == '': + self.init_r128_backend() + backend_instance = self.r128_backend_instance + store_track_gain = self.store_track_r128_gain + else: + backend_instance = self.backend_instance + store_track_gain = self.store_track_gain + try: - track_gains = self.backend_instance.compute_track_gain([item]) + track_gains = backend_instance.compute_track_gain([item]) if len(track_gains) != 1: raise ReplayGainError( u"ReplayGain backend failed for track {0}".format(item) ) - self.store_track_gain(item, track_gains[0]) + store_track_gain(item, track_gains[0]) if write: item.try_write() except ReplayGainError as e: @@ -923,6 +1008,19 @@ class ReplayGainPlugin(BeetsPlugin): raise ui.UserError( u"Fatal replay gain error: {0}".format(e)) + def init_r128_backend(self): + backend_name = 'bs1770gain' + + try: + self.r128_backend_instance = self.backends[backend_name]( + self.config, self._log + ) + except (ReplayGainError, FatalReplayGainError) as e: + raise ui.UserError( + u'replaygain initialization failed: {0}'.format(e)) + + self.r128_backend_instance.method = '--ebu' + def imported(self, session, task): """Add replay gain info to items or albums of ``task``. """ @@ -935,19 +1033,28 @@ class ReplayGainPlugin(BeetsPlugin): """Return the "replaygain" ui subcommand. """ def func(lib, opts, args): - self._log.setLevel(logging.INFO) - - write = ui.should_write() + write = ui.should_write(opts.write) + force = opts.force if opts.album: for album in lib.albums(ui.decargs(args)): - self.handle_album(album, write) + self.handle_album(album, write, force) else: for item in lib.items(ui.decargs(args)): - self.handle_track(item, write) + self.handle_track(item, write, force) cmd = ui.Subcommand('replaygain', help=u'analyze for ReplayGain') cmd.parser.add_album_option() + cmd.parser.add_option( + "-f", "--force", dest="force", action="store_true", default=False, + help=u"analyze all files, including those that " + "already have ReplayGain metadata") + cmd.parser.add_option( + "-w", "--write", default=None, action="store_true", + help=u"write new metadata to files' tags") + cmd.parser.add_option( + "-W", "--nowrite", dest="write", action="store_false", + help=u"don't write metadata (opposite of -w)") cmd.func = func return [cmd] diff --git a/libs/beetsplug/rewrite.py b/libs/beetsplug/rewrite.py index b0104a11..eadb1425 100644 --- a/libs/beetsplug/rewrite.py +++ b/libs/beetsplug/rewrite.py @@ -51,7 +51,7 @@ class RewritePlugin(BeetsPlugin): # Gather all the rewrite rules for each field. rules = defaultdict(list) for key, view in self.config.items(): - value = view.get(unicode) + value = view.as_str() try: fieldname, pattern = key.split(None, 1) except ValueError: @@ -68,7 +68,7 @@ class RewritePlugin(BeetsPlugin): rules['albumartist'].append((pattern, value)) # Replace each template field with the new rewriter function. - for fieldname, fieldrules in rules.iteritems(): + for fieldname, fieldrules in rules.items(): getter = rewriter(fieldname, fieldrules) self.template_fields[fieldname] = getter if fieldname in library.Album._fields: diff --git a/libs/beetsplug/scrub.py b/libs/beetsplug/scrub.py index ed4040d5..be6e7fd1 100644 --- a/libs/beetsplug/scrub.py +++ b/libs/beetsplug/scrub.py @@ -24,6 +24,7 @@ from beets import ui from beets import util from beets import config from beets import mediafile +import mutagen _MUTAGEN_FORMATS = { 'asf': 'ASF', @@ -106,7 +107,7 @@ class ScrubPlugin(BeetsPlugin): for tag in f.keys(): del f[tag] f.save() - except IOError as exc: + except (IOError, mutagen.MutagenError) as exc: self._log.error(u'could not scrub {0}: {1}', util.displayable_path(path), exc) @@ -119,10 +120,11 @@ class ScrubPlugin(BeetsPlugin): try: mf = mediafile.MediaFile(util.syspath(item.path), config['id3v23'].get(bool)) - except IOError as exc: + except mediafile.UnreadableFileError as exc: self._log.error(u'could not open file to scrub: {0}', exc) - art = mf.art + return + images = mf.images # Remove all tags. self._scrub(item.path) @@ -131,12 +133,15 @@ class ScrubPlugin(BeetsPlugin): if restore: self._log.debug(u'writing new tags after scrub') item.try_write() - if art: + if images: self._log.debug(u'restoring art') - mf = mediafile.MediaFile(util.syspath(item.path), - config['id3v23'].get(bool)) - mf.art = art - mf.save() + try: + mf = mediafile.MediaFile(util.syspath(item.path), + config['id3v23'].get(bool)) + mf.images = images + mf.save() + except mediafile.UnreadableFileError as exc: + self._log.error(u'could not write tags: {0}', exc) def import_task_files(self, session, task): """Automatically scrub imported files.""" diff --git a/libs/beetsplug/smartplaylist.py b/libs/beetsplug/smartplaylist.py index f6d7f715..009512c5 100644 --- a/libs/beetsplug/smartplaylist.py +++ b/libs/beetsplug/smartplaylist.py @@ -20,11 +20,13 @@ from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets import ui -from beets.util import mkdirall, normpath, syspath +from beets.util import (mkdirall, normpath, sanitize_path, syspath, + bytestring_path) from beets.library import Item, Album, parse_query_string from beets.dbcore import OrQuery from beets.dbcore.query import MultipleSort, ParsingError import os +import six class SmartPlaylistPlugin(BeetsPlugin): @@ -97,7 +99,7 @@ class SmartPlaylistPlugin(BeetsPlugin): for playlist in self.config['playlists'].get(list): if 'name' not in playlist: - self._log.warn(u"playlist configuration is missing name") + self._log.warning(u"playlist configuration is missing name") continue playlist_data = (playlist['name'],) @@ -106,7 +108,7 @@ class SmartPlaylistPlugin(BeetsPlugin): qs = playlist.get(key) if qs is None: query_and_sort = None, None - elif isinstance(qs, basestring): + elif isinstance(qs, six.string_types): query_and_sort = parse_query_string(qs, Model) elif len(qs) == 1: query_and_sort = parse_query_string(qs[0], Model) @@ -133,8 +135,8 @@ class SmartPlaylistPlugin(BeetsPlugin): playlist_data += (query_and_sort,) except ParsingError as exc: - self._log.warn(u"invalid query in playlist {}: {}", - playlist['name'], exc) + self._log.warning(u"invalid query in playlist {}: {}", + playlist['name'], exc) continue self._unmatched_playlists.add(playlist_data) @@ -165,10 +167,14 @@ class SmartPlaylistPlugin(BeetsPlugin): len(self._matched_playlists)) playlist_dir = self.config['playlist_dir'].as_filename() + playlist_dir = bytestring_path(playlist_dir) relative_to = self.config['relative_to'].get() if relative_to: relative_to = normpath(relative_to) + # Maps playlist filenames to lists of track filenames. + m3us = {} + for playlist in self._matched_playlists: name, (query, q_sort), (album_query, a_q_sort) = playlist self._log.debug(u"Creating playlist {0}", name) @@ -180,11 +186,11 @@ class SmartPlaylistPlugin(BeetsPlugin): for album in lib.albums(album_query, a_q_sort): items.extend(album.items()) - m3us = {} # As we allow tags in the m3u names, we'll need to iterate through # the items and generate the correct m3u file names. for item in items: m3u_name = item.evaluate_template(name, True) + m3u_name = sanitize_path(m3u_name, lib.replacements) if m3u_name not in m3us: m3us[m3u_name] = [] item_path = item.path @@ -192,11 +198,14 @@ class SmartPlaylistPlugin(BeetsPlugin): item_path = os.path.relpath(item.path, relative_to) if item_path not in m3us[m3u_name]: m3us[m3u_name].append(item_path) - # Now iterate through the m3us that we need to generate - for m3u in m3us: - m3u_path = normpath(os.path.join(playlist_dir, m3u)) - mkdirall(m3u_path) - with open(syspath(m3u_path), 'w') as f: - for path in m3us[m3u]: - f.write(path + b'\n') + + # Write all of the accumulated track lists to files. + for m3u in m3us: + m3u_path = normpath(os.path.join(playlist_dir, + bytestring_path(m3u))) + mkdirall(m3u_path) + with open(syspath(m3u_path), 'wb') as f: + for path in m3us[m3u]: + f.write(path + b'\n') + self._log.info(u"{0} playlists updated", len(self._matched_playlists)) diff --git a/libs/beetsplug/sonosupdate.py b/libs/beetsplug/sonosupdate.py new file mode 100644 index 00000000..56a315a1 --- /dev/null +++ b/libs/beetsplug/sonosupdate.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2018, Tobias Sauerwein. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Updates a Sonos library whenever the beets library is changed. +This is based on the Kodi Update plugin. +""" +from __future__ import division, absolute_import, print_function + +from beets.plugins import BeetsPlugin +import soco + + +class SonosUpdate(BeetsPlugin): + def __init__(self): + super(SonosUpdate, self).__init__() + self.register_listener('database_change', self.listen_for_db_change) + + def listen_for_db_change(self, lib, model): + """Listens for beets db change and register the update""" + self.register_listener('cli_exit', self.update) + + def update(self, lib): + """When the client exists try to send refresh request to a Sonos + controler. + """ + self._log.info(u'Requesting a Sonos library update...') + + device = soco.discovery.any_soco() + + if device: + device.music_library.start_library_update() + else: + self._log.warning(u'Could not find a Sonos device.') + return + + self._log.info(u'Sonos update triggered') diff --git a/libs/beetsplug/spotify.py b/libs/beetsplug/spotify.py index 081a027f..36231f29 100644 --- a/libs/beetsplug/spotify.py +++ b/libs/beetsplug/spotify.py @@ -63,8 +63,8 @@ class SpotifyPlugin(BeetsPlugin): self.config['show_failures'].set(True) if self.config['mode'].get() not in ['list', 'open']: - self._log.warn(u'{0} is not a valid mode', - self.config['mode'].get()) + self._log.warning(u'{0} is not a valid mode', + self.config['mode'].get()) return False self.opts = opts @@ -124,9 +124,8 @@ class SpotifyPlugin(BeetsPlugin): # Apply market filter if requested region_filter = self.config['region_filter'].get() if region_filter: - r_data = filter( - lambda x: region_filter in x['available_markets'], r_data - ) + r_data = [x for x in r_data if region_filter + in x['available_markets']] # Simplest, take the first result chosen_result = None @@ -155,15 +154,15 @@ class SpotifyPlugin(BeetsPlugin): self._log.info(u'track: {0}', track) self._log.info(u'') else: - self._log.warn(u'{0} track(s) did not match a Spotify ID;\n' - u'use --show-failures to display', - failure_count) + self._log.warning(u'{0} track(s) did not match a Spotify ID;\n' + u'use --show-failures to display', + failure_count) return results def output_results(self, results): if results: - ids = map(lambda x: x['id'], results) + ids = [x['id'] for x in results] if self.config['mode'].get() == "open": self._log.info(u'Attempting to open Spotify with playlist') spotify_url = self.playlist_partial + ",".join(ids) @@ -171,6 +170,6 @@ class SpotifyPlugin(BeetsPlugin): else: for item in ids: - print(unicode.encode(self.open_url + item)) + print(self.open_url + item) else: - self._log.warn(u'No Spotify tracks found from beets query') + self._log.warning(u'No Spotify tracks found from beets query') diff --git a/libs/beetsplug/the.py b/libs/beetsplug/the.py index 6bed4c6e..cfb583ce 100644 --- a/libs/beetsplug/the.py +++ b/libs/beetsplug/the.py @@ -54,14 +54,14 @@ class ThePlugin(BeetsPlugin): self._log.error(u'invalid pattern: {0}', p) else: if not (p.startswith('^') or p.endswith('$')): - self._log.warn(u'warning: \"{0}\" will not ' - u'match string start/end', p) + self._log.warning(u'warning: \"{0}\" will not ' + u'match string start/end', p) if self.config['a']: self.patterns = [PATTERN_A] + self.patterns if self.config['the']: self.patterns = [PATTERN_THE] + self.patterns if not self.patterns: - self._log.warn(u'no patterns defined!') + self._log.warning(u'no patterns defined!') def unthe(self, text, pattern): """Moves pattern in the path format string or strips it @@ -81,7 +81,7 @@ class ThePlugin(BeetsPlugin): if self.config['strip']: return r else: - fmt = self.config['format'].get(unicode) + fmt = self.config['format'].as_str() return fmt.format(r, t.strip()).strip() else: return u'' diff --git a/libs/beetsplug/thumbnails.py b/libs/beetsplug/thumbnails.py index 0e7fbc6e..04845e88 100644 --- a/libs/beetsplug/thumbnails.py +++ b/libs/beetsplug/thumbnails.py @@ -35,6 +35,7 @@ from beets.plugins import BeetsPlugin from beets.ui import Subcommand, decargs from beets import util from beets.util.artresizer import ArtResizer, get_im_version, get_pil_version +import six BASE_DIR = os.path.join(BaseDirectory.xdg_cache_home, "thumbnails") @@ -162,15 +163,16 @@ class ThumbnailsPlugin(BeetsPlugin): See http://standards.freedesktop.org/thumbnail-spec/latest/x227.html """ uri = self.get_uri(path) - hash = md5(uri).hexdigest() - return b"{0}.png".format(hash) + hash = md5(uri.encode('utf-8')).hexdigest() + return util.bytestring_path("{0}.png".format(hash)) def add_tags(self, album, image_path): """Write required metadata to the thumbnail See http://standards.freedesktop.org/thumbnail-spec/latest/x142.html """ + mtime = os.stat(album.artpath).st_mtime metadata = {"Thumb::URI": self.get_uri(album.artpath), - "Thumb::MTime": unicode(os.stat(album.artpath).st_mtime)} + "Thumb::MTime": six.text_type(mtime)} try: self.write_metadata(image_path, metadata) except Exception: @@ -183,7 +185,8 @@ class ThumbnailsPlugin(BeetsPlugin): return artfile = os.path.split(album.artpath)[1] with open(outfilename, 'w') as f: - f.write(b"[Desktop Entry]\nIcon=./{0}".format(artfile)) + f.write('[Desktop Entry]\n') + f.write('Icon=./{0}'.format(artfile.decode('utf-8'))) f.close() self._log.debug(u"Wrote file {0}", util.displayable_path(outfilename)) @@ -232,7 +235,7 @@ def copy_c_string(c_string): # work. A more surefire way would be to allocate a ctypes buffer and copy # the data with `memcpy` or somesuch. s = ctypes.cast(c_string, ctypes.c_char_p).value - return '' + s + return b'' + s class GioURI(URIGetter): @@ -271,8 +274,6 @@ class GioURI(URIGetter): try: uri_ptr = self.libgio.g_file_get_uri(g_file_ptr) - except: - raise finally: self.libgio.g_object_unref(g_file_ptr) if not uri_ptr: @@ -282,8 +283,12 @@ class GioURI(URIGetter): try: uri = copy_c_string(uri_ptr) - except: - raise finally: self.libgio.g_free(uri_ptr) - return uri + + try: + return uri.decode(util._fsencoding()) + except UnicodeDecodeError: + raise RuntimeError( + "Could not decode filename from GIO: {!r}".format(uri) + ) diff --git a/libs/beetsplug/web/__init__.py b/libs/beetsplug/web/__init__.py index 67d99db6..3cf43ed5 100644 --- a/libs/beetsplug/web/__init__.py +++ b/libs/beetsplug/web/__init__.py @@ -24,7 +24,9 @@ import flask from flask import g from werkzeug.routing import BaseConverter, PathConverter import os +from unidecode import unidecode import json +import base64 # Utilities. @@ -37,7 +39,15 @@ def _rep(obj, expand=False): out = dict(obj) if isinstance(obj, beets.library.Item): - del out['path'] + if app.config.get('INCLUDE_PATHS', False): + out['path'] = util.displayable_path(out['path']) + else: + del out['path'] + + # Filter all bytes attributes and convert them to strings. + for key, value in out.items(): + if isinstance(out[key], bytes): + out[key] = base64.b64encode(value).decode('ascii') # Get the size (in bytes) of the backing file. This is useful # for the Tomahawk resolver API. @@ -55,11 +65,13 @@ def _rep(obj, expand=False): return out -def json_generator(items, root): +def json_generator(items, root, expand=False): """Generator that dumps list of beets Items or Albums as JSON :param root: root key for JSON :param items: list of :class:`Item` or :class:`Album` to dump + :param expand: If true every :class:`Album` contains its items in the json + representation :returns: generator that yields strings """ yield '{"%s":[' % root @@ -69,10 +81,16 @@ def json_generator(items, root): first = False else: yield ',' - yield json.dumps(_rep(item)) + yield json.dumps(_rep(item, expand=expand)) yield ']}' +def is_expand(): + """Returns whether the current request is for an expanded response.""" + + return flask.request.args.get('expand') is not None + + def resource(name): """Decorates a function to handle RESTful HTTP requests for a resource. """ @@ -82,7 +100,7 @@ def resource(name): entities = [entity for entity in entities if entity] if len(entities) == 1: - return flask.jsonify(_rep(entities[0])) + return flask.jsonify(_rep(entities[0], expand=is_expand())) elif entities: return app.response_class( json_generator(entities, root=name), @@ -101,7 +119,10 @@ def resource_query(name): def make_responder(query_func): def responder(queries): return app.response_class( - json_generator(query_func(queries), root='results'), + json_generator( + query_func(queries), + root='results', expand=is_expand() + ), mimetype='application/json' ) responder.__name__ = 'query_{0}'.format(name) @@ -116,7 +137,7 @@ def resource_list(name): def make_responder(list_all): def responder(): return app.response_class( - json_generator(list_all(), root=name), + json_generator(list_all(), root=name, expand=is_expand()), mimetype='application/json' ) responder.__name__ = 'all_{0}'.format(name) @@ -162,11 +183,16 @@ class QueryConverter(PathConverter): return ','.join(value) +class EverythingConverter(PathConverter): + regex = '.*?' + + # Flask setup. app = flask.Flask(__name__) app.url_map.converters['idlist'] = IdListConverter app.url_map.converters['query'] = QueryConverter +app.url_map.converters['everything'] = EverythingConverter @app.before_request @@ -192,9 +218,34 @@ def all_items(): @app.route('/item/<int:item_id>/file') def item_file(item_id): item = g.lib.get_item(item_id) - response = flask.send_file(item.path, as_attachment=True, - attachment_filename=os.path.basename(item.path)) - response.headers['Content-Length'] = os.path.getsize(item.path) + + # On Windows under Python 2, Flask wants a Unicode path. On Python 3, it + # *always* wants a Unicode path. + if os.name == 'nt': + item_path = util.syspath(item.path) + else: + item_path = util.py3_path(item.path) + + try: + unicode_item_path = util.text_string(item.path) + except (UnicodeDecodeError, UnicodeEncodeError): + unicode_item_path = util.displayable_path(item.path) + + base_filename = os.path.basename(unicode_item_path) + try: + # Imitate http.server behaviour + base_filename.encode("latin-1", "strict") + except UnicodeEncodeError: + safe_filename = unidecode(base_filename) + else: + safe_filename = base_filename + + response = flask.send_file( + item_path, + as_attachment=True, + attachment_filename=safe_filename + ) + response.headers['Content-Length'] = os.path.getsize(item_path) return response @@ -204,6 +255,16 @@ def item_query(queries): return g.lib.items(queries) +@app.route('/item/path/<everything:path>') +def item_at_path(path): + query = beets.library.PathQuery('path', path.encode('utf-8')) + item = g.lib.items(query).get() + if item: + return flask.jsonify(_rep(item)) + else: + return flask.abort(404) + + @app.route('/item/values/<string:key>') def item_unique_field_values(key): sort_key = flask.request.args.get('sort_key', key) @@ -239,8 +300,8 @@ def album_query(queries): @app.route('/album/<int:album_id>/art') def album_art(album_id): album = g.lib.get_album(album_id) - if album.artpath: - return flask.send_file(album.artpath) + if album and album.artpath: + return flask.send_file(album.artpath.decode()) else: return flask.abort(404) @@ -295,6 +356,9 @@ class WebPlugin(BeetsPlugin): 'host': u'127.0.0.1', 'port': 8337, 'cors': '', + 'cors_supports_credentials': False, + 'reverse_proxy': False, + 'include_paths': False, }) def commands(self): @@ -310,6 +374,11 @@ class WebPlugin(BeetsPlugin): self.config['port'] = int(args.pop(0)) app.config['lib'] = lib + # Normalizes json output + app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False + + app.config['INCLUDE_PATHS'] = self.config['include_paths'] + # Enable CORS if required. if self.config['cors']: self._log.info(u'Enabling CORS with origin: {0}', @@ -319,10 +388,56 @@ class WebPlugin(BeetsPlugin): app.config['CORS_RESOURCES'] = { r"/*": {"origins": self.config['cors'].get(str)} } - CORS(app) + CORS( + app, + supports_credentials=self.config[ + 'cors_supports_credentials' + ].get(bool) + ) + + # Allow serving behind a reverse proxy + if self.config['reverse_proxy']: + app.wsgi_app = ReverseProxied(app.wsgi_app) + # Start the web application. - app.run(host=self.config['host'].get(unicode), + app.run(host=self.config['host'].as_str(), port=self.config['port'].get(int), debug=opts.debug, threaded=True) cmd.func = func return [cmd] + + +class ReverseProxied(object): + '''Wrap the application in this middleware and configure the + front-end server to add these headers, to let you quietly bind + this to a URL other than / and to an HTTP scheme that is + different than what is used locally. + + In nginx: + location /myprefix { + proxy_pass http://192.168.0.1:5001; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Scheme $scheme; + proxy_set_header X-Script-Name /myprefix; + } + + From: http://flask.pocoo.org/snippets/35/ + + :param app: the WSGI application + ''' + def __init__(self, app): + self.app = app + + def __call__(self, environ, start_response): + script_name = environ.get('HTTP_X_SCRIPT_NAME', '') + if script_name: + environ['SCRIPT_NAME'] = script_name + path_info = environ['PATH_INFO'] + if path_info.startswith(script_name): + environ['PATH_INFO'] = path_info[len(script_name):] + + scheme = environ.get('HTTP_X_SCHEME', '') + if scheme: + environ['wsgi.url_scheme'] = scheme + return self.app(environ, start_response) diff --git a/libs/beetsplug/web/static/beets.js b/libs/beetsplug/web/static/beets.js index 757f2cda..51985c18 100644 --- a/libs/beetsplug/web/static/beets.js +++ b/libs/beetsplug/web/static/beets.js @@ -4,7 +4,7 @@ var timeFormat = function(secs) { return '0:00'; } secs = Math.round(secs); - var mins = '' + Math.round(secs / 60); + var mins = '' + Math.floor(secs / 60); secs = '' + (secs % 60); if (secs.length < 2) { secs = '0' + secs; @@ -147,7 +147,7 @@ var BeetsRouter = Backbone.Router.extend({ }, itemQuery: function(query) { var queryURL = query.split(/\s+/).map(encodeURIComponent).join('/'); - $.getJSON('/item/query/' + queryURL, function(data) { + $.getJSON('item/query/' + queryURL, function(data) { var models = _.map( data['results'], function(d) { return new Item(d); } @@ -161,7 +161,7 @@ var router = new BeetsRouter(); // Model. var Item = Backbone.Model.extend({ - urlRoot: '/item' + urlRoot: 'item' }); var Items = Backbone.Collection.extend({ model: Item @@ -264,7 +264,7 @@ var AppView = Backbone.View.extend({ $('#extra-detail').empty().append(extraDetailView.render().el); }, playItem: function(item) { - var url = '/item/' + item.get('id') + '/file'; + var url = 'item/' + item.get('id') + '/file'; $('#player audio').attr('src', url); $('#player audio').get(0).play(); diff --git a/libs/beetsplug/web/templates/index.html b/libs/beetsplug/web/templates/index.html index 7c37c82d..0fdd46d1 100644 --- a/libs/beetsplug/web/templates/index.html +++ b/libs/beetsplug/web/templates/index.html @@ -82,7 +82,7 @@ <% } %> <dt>File</dt> <dd> - <a target="_blank" class="download" href="/item/<%= id %>/file">download</a> + <a target="_blank" class="download" href="item/<%= id %>/file">download</a> </dd> <% if (lyrics) { %> <dt>Lyrics</dt> diff --git a/libs/beetsplug/zero.py b/libs/beetsplug/zero.py index d20f7616..022c2c72 100644 --- a/libs/beetsplug/zero.py +++ b/libs/beetsplug/zero.py @@ -16,125 +16,148 @@ """ Clears tag fields in media files.""" from __future__ import division, absolute_import, print_function +import six import re + from beets.plugins import BeetsPlugin from beets.mediafile import MediaFile from beets.importer import action +from beets.ui import Subcommand, decargs, input_yn from beets.util import confit __author__ = 'baobab@heresiarch.info' -__version__ = '0.10' class ZeroPlugin(BeetsPlugin): - - _instance = None - def __init__(self): super(ZeroPlugin, self).__init__() - # Listeners. self.register_listener('write', self.write_event) self.register_listener('import_task_choice', self.import_task_choice_event) self.config.add({ + 'auto': True, 'fields': [], 'keep_fields': [], 'update_database': False, }) - self.patterns = {} + self.fields_to_progs = {} self.warned = False - # We'll only handle `fields` or `keep_fields`, but not both. + """Read the bulk of the config into `self.fields_to_progs`. + After construction, `fields_to_progs` contains all the fields that + should be zeroed as keys and maps each of those to a list of compiled + regexes (progs) as values. + A field is zeroed if its value matches one of the associated progs. If + progs is empty, then the associated field is always zeroed. + """ if self.config['fields'] and self.config['keep_fields']: - self._log.warn(u'cannot blacklist and whitelist at the same time') - + self._log.warning( + u'cannot blacklist and whitelist at the same time' + ) # Blacklist mode. - if self.config['fields']: - self.validate_config('fields') + elif self.config['fields']: for field in self.config['fields'].as_str_seq(): - self.set_pattern(field) - + self._set_pattern(field) # Whitelist mode. elif self.config['keep_fields']: - self.validate_config('keep_fields') - for field in MediaFile.fields(): - if field in self.config['keep_fields'].as_str_seq(): - continue - self.set_pattern(field) + if (field not in self.config['keep_fields'].as_str_seq() and + # These fields should always be preserved. + field not in ('id', 'path', 'album_id')): + self._set_pattern(field) - # These fields should always be preserved. - for key in ('id', 'path', 'album_id'): - if key in self.patterns: - del self.patterns[key] + def commands(self): + zero_command = Subcommand('zero', help='set fields to null') - def validate_config(self, mode): - """Check whether fields in the configuration are valid. + def zero_fields(lib, opts, args): + if not decargs(args) and not input_yn( + u"Remove fields for all items? (Y/n)", + True): + return + for item in lib.items(decargs(args)): + self.process_item(item) - `mode` should either be "fields" or "keep_fields", indicating - the section of the configuration to validate. + zero_command.func = zero_fields + return [zero_command] + + def _set_pattern(self, field): + """Populate `self.fields_to_progs` for a given field. + Do some sanity checks then compile the regexes. """ - for field in self.config[mode].as_str_seq(): - if field not in MediaFile.fields(): - self._log.error(u'invalid field: {0}', field) - continue - if mode == 'fields' and field in ('id', 'path', 'album_id'): - self._log.warn(u'field \'{0}\' ignored, zeroing ' - u'it would be dangerous', field) - continue - - def set_pattern(self, field): - """Set a field in `self.patterns` to a string list corresponding to - the configuration, or `True` if the field has no specific - configuration. - """ - try: - self.patterns[field] = self.config[field].as_str_seq() - except confit.NotFoundError: - # Matches everything - self.patterns[field] = True + if field not in MediaFile.fields(): + self._log.error(u'invalid field: {0}', field) + elif field in ('id', 'path', 'album_id'): + self._log.warning(u'field \'{0}\' ignored, zeroing ' + u'it would be dangerous', field) + else: + try: + for pattern in self.config[field].as_str_seq(): + prog = re.compile(pattern, re.IGNORECASE) + self.fields_to_progs.setdefault(field, []).append(prog) + except confit.NotFoundError: + # Matches everything + self.fields_to_progs[field] = [] def import_task_choice_event(self, session, task): - """Listen for import_task_choice event.""" if task.choice_flag == action.ASIS and not self.warned: - self._log.warn(u'cannot zero in \"as-is\" mode') + self._log.warning(u'cannot zero in \"as-is\" mode') self.warned = True # TODO request write in as-is mode - @classmethod - def match_patterns(cls, field, patterns): - """Check if field (as string) is matching any of the patterns in - the list. - """ - if patterns is True: - return True - for p in patterns: - if re.search(p, unicode(field), flags=re.IGNORECASE): - return True - return False - def write_event(self, item, path, tags): - """Set values in tags to `None` if the key and value are matched - by `self.patterns`. - """ - if not self.patterns: - self._log.warn(u'no fields, nothing to do') - return + if self.config['auto']: + self.set_fields(item, tags) - for field, patterns in self.patterns.items(): + def set_fields(self, item, tags): + """Set values in `tags` to `None` if the field is in + `self.fields_to_progs` and any of the corresponding `progs` matches the + field value. + Also update the `item` itself if `update_database` is set in the + config. + """ + fields_set = False + + if not self.fields_to_progs: + self._log.warning(u'no fields, nothing to do') + return False + + for field, progs in self.fields_to_progs.items(): if field in tags: value = tags[field] - match = self.match_patterns(tags[field], patterns) + match = _match_progs(tags[field], progs) else: value = '' - match = patterns is True + match = not progs if match: + fields_set = True self._log.debug(u'{0}: {1} -> None', field, value) tags[field] = None if self.config['update_database']: item[field] = None + + return fields_set + + def process_item(self, item): + tags = dict(item) + + if self.set_fields(item, tags): + item.write(tags=tags) + if self.config['update_database']: + item.store(fields=tags) + + +def _match_progs(value, progs): + """Check if `value` (as string) is matching any of the compiled regexes in + the `progs` list. + """ + if not progs: + return True + for prog in progs: + if prog.search(six.text_type(value)): + return True + return False diff --git a/libs/bin/beet.exe b/libs/bin/beet.exe new file mode 100644 index 00000000..2028d200 Binary files /dev/null and b/libs/bin/beet.exe differ diff --git a/libs/bin/mid3cp b/libs/bin/mid3cp new file mode 100644 index 00000000..2020a753 --- /dev/null +++ b/libs/bin/mid3cp @@ -0,0 +1,16 @@ +#!h:\src\env\nzbtomedia\scripts\python.exe +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import sys + +from mutagen._tools.mid3cp import entry_point + + +if __name__ == "__main__": + sys.exit(entry_point()) diff --git a/libs/bin/mid3iconv b/libs/bin/mid3iconv new file mode 100644 index 00000000..b15ad500 --- /dev/null +++ b/libs/bin/mid3iconv @@ -0,0 +1,16 @@ +#!h:\src\env\nzbtomedia\scripts\python.exe +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import sys + +from mutagen._tools.mid3iconv import entry_point + + +if __name__ == "__main__": + sys.exit(entry_point()) diff --git a/libs/bin/mid3v2 b/libs/bin/mid3v2 new file mode 100644 index 00000000..34064886 --- /dev/null +++ b/libs/bin/mid3v2 @@ -0,0 +1,16 @@ +#!h:\src\env\nzbtomedia\scripts\python.exe +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import sys + +from mutagen._tools.mid3v2 import entry_point + + +if __name__ == "__main__": + sys.exit(entry_point()) diff --git a/libs/bin/moggsplit b/libs/bin/moggsplit new file mode 100644 index 00000000..005789e4 --- /dev/null +++ b/libs/bin/moggsplit @@ -0,0 +1,16 @@ +#!h:\src\env\nzbtomedia\scripts\python.exe +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import sys + +from mutagen._tools.moggsplit import entry_point + + +if __name__ == "__main__": + sys.exit(entry_point()) diff --git a/libs/bin/mutagen-inspect b/libs/bin/mutagen-inspect new file mode 100644 index 00000000..2e10d131 --- /dev/null +++ b/libs/bin/mutagen-inspect @@ -0,0 +1,16 @@ +#!h:\src\env\nzbtomedia\scripts\python.exe +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import sys + +from mutagen._tools.mutagen_inspect import entry_point + + +if __name__ == "__main__": + sys.exit(entry_point()) diff --git a/libs/bin/mutagen-pony b/libs/bin/mutagen-pony new file mode 100644 index 00000000..a03cd90f --- /dev/null +++ b/libs/bin/mutagen-pony @@ -0,0 +1,16 @@ +#!h:\src\env\nzbtomedia\scripts\python.exe +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import sys + +from mutagen._tools.mutagen_pony import entry_point + + +if __name__ == "__main__": + sys.exit(entry_point()) diff --git a/libs/bin/unidecode.exe b/libs/bin/unidecode.exe new file mode 100644 index 00000000..07a0e413 Binary files /dev/null and b/libs/bin/unidecode.exe differ diff --git a/libs/colorama/__init__.py b/libs/colorama/__init__.py index 670e6b39..2a3bf471 100644 --- a/libs/colorama/__init__.py +++ b/libs/colorama/__init__.py @@ -3,5 +3,4 @@ from .initialise import init, deinit, reinit, colorama_text from .ansi import Fore, Back, Style, Cursor from .ansitowin32 import AnsiToWin32 -__version__ = '0.3.7' - +__version__ = '0.4.1' diff --git a/libs/colorama/ansitowin32.py b/libs/colorama/ansitowin32.py index b7ff6f21..359c92be 100644 --- a/libs/colorama/ansitowin32.py +++ b/libs/colorama/ansitowin32.py @@ -13,14 +13,6 @@ if windll is not None: winterm = WinTerm() -def is_stream_closed(stream): - return not hasattr(stream, 'closed') or stream.closed - - -def is_a_tty(stream): - return hasattr(stream, 'isatty') and stream.isatty() - - class StreamWrapper(object): ''' Wraps a stream (such as stdout), acting as a transparent proxy for all @@ -36,9 +28,38 @@ class StreamWrapper(object): def __getattr__(self, name): return getattr(self.__wrapped, name) + def __enter__(self, *args, **kwargs): + # special method lookup bypasses __getattr__/__getattribute__, see + # https://stackoverflow.com/questions/12632894/why-doesnt-getattr-work-with-exit + # thus, contextlib magic methods are not proxied via __getattr__ + return self.__wrapped.__enter__(*args, **kwargs) + + def __exit__(self, *args, **kwargs): + return self.__wrapped.__exit__(*args, **kwargs) + def write(self, text): self.__convertor.write(text) + def isatty(self): + stream = self.__wrapped + if 'PYCHARM_HOSTED' in os.environ: + if stream is not None and (stream is sys.__stdout__ or stream is sys.__stderr__): + return True + try: + stream_isatty = stream.isatty + except AttributeError: + return False + else: + return stream_isatty() + + @property + def closed(self): + stream = self.__wrapped + try: + return stream.closed + except AttributeError: + return True + class AnsiToWin32(object): ''' @@ -46,8 +67,8 @@ class AnsiToWin32(object): sequences from the text, and if outputting to a tty, will convert them into win32 function calls. ''' - ANSI_CSI_RE = re.compile('\001?\033\[((?:\d|;)*)([a-zA-Z])\002?') # Control Sequence Introducer - ANSI_OSC_RE = re.compile('\001?\033\]((?:.|;)*?)(\x07)\002?') # Operating System Command + ANSI_CSI_RE = re.compile('\001?\033\\[((?:\\d|;)*)([a-zA-Z])\002?') # Control Sequence Introducer + ANSI_OSC_RE = re.compile('\001?\033\\]((?:.|;)*?)(\x07)\002?') # Operating System Command def __init__(self, wrapped, convert=None, strip=None, autoreset=False): # The wrapped stream (normally sys.stdout or sys.stderr) @@ -68,12 +89,12 @@ class AnsiToWin32(object): # should we strip ANSI sequences from our output? if strip is None: - strip = conversion_supported or (not is_stream_closed(wrapped) and not is_a_tty(wrapped)) + strip = conversion_supported or (not self.stream.closed and not self.stream.isatty()) self.strip = strip # should we should convert ANSI sequences into win32 calls? if convert is None: - convert = conversion_supported and not is_stream_closed(wrapped) and is_a_tty(wrapped) + convert = conversion_supported and not self.stream.closed and self.stream.isatty() self.convert = convert # dict of ansi codes to win32 functions and parameters @@ -149,7 +170,7 @@ class AnsiToWin32(object): def reset_all(self): if self.convert: self.call_win32('m', (0,)) - elif not self.strip and not is_stream_closed(self.wrapped): + elif not self.strip and not self.stream.closed: self.wrapped.write(Style.RESET_ALL) diff --git a/libs/colorama/initialise.py b/libs/colorama/initialise.py index 834962a3..430d0668 100644 --- a/libs/colorama/initialise.py +++ b/libs/colorama/initialise.py @@ -78,5 +78,3 @@ def wrap_stream(stream, convert, strip, autoreset, wrap): if wrapper.should_wrap(): stream = wrapper.stream return stream - - diff --git a/libs/colorama/win32.py b/libs/colorama/win32.py index 3d1d2f2d..c2d83603 100644 --- a/libs/colorama/win32.py +++ b/libs/colorama/win32.py @@ -83,33 +83,31 @@ else: ] _FillConsoleOutputAttribute.restype = wintypes.BOOL - _SetConsoleTitleW = windll.kernel32.SetConsoleTitleA + _SetConsoleTitleW = windll.kernel32.SetConsoleTitleW _SetConsoleTitleW.argtypes = [ - wintypes.LPCSTR + wintypes.LPCWSTR ] _SetConsoleTitleW.restype = wintypes.BOOL - handles = { - STDOUT: _GetStdHandle(STDOUT), - STDERR: _GetStdHandle(STDERR), - } - - def winapi_test(): - handle = handles[STDOUT] + def _winapi_test(handle): csbi = CONSOLE_SCREEN_BUFFER_INFO() success = _GetConsoleScreenBufferInfo( handle, byref(csbi)) return bool(success) + def winapi_test(): + return any(_winapi_test(h) for h in + (_GetStdHandle(STDOUT), _GetStdHandle(STDERR))) + def GetConsoleScreenBufferInfo(stream_id=STDOUT): - handle = handles[stream_id] + handle = _GetStdHandle(stream_id) csbi = CONSOLE_SCREEN_BUFFER_INFO() success = _GetConsoleScreenBufferInfo( handle, byref(csbi)) return csbi def SetConsoleTextAttribute(stream_id, attrs): - handle = handles[stream_id] + handle = _GetStdHandle(stream_id) return _SetConsoleTextAttribute(handle, attrs) def SetConsoleCursorPosition(stream_id, position, adjust=True): @@ -127,11 +125,11 @@ else: adjusted_position.Y += sr.Top adjusted_position.X += sr.Left # Resume normal processing - handle = handles[stream_id] + handle = _GetStdHandle(stream_id) return _SetConsoleCursorPosition(handle, adjusted_position) def FillConsoleOutputCharacter(stream_id, char, length, start): - handle = handles[stream_id] + handle = _GetStdHandle(stream_id) char = c_char(char.encode()) length = wintypes.DWORD(length) num_written = wintypes.DWORD(0) @@ -142,7 +140,7 @@ else: def FillConsoleOutputAttribute(stream_id, attr, length, start): ''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )''' - handle = handles[stream_id] + handle = _GetStdHandle(stream_id) attribute = wintypes.WORD(attr) length = wintypes.DWORD(length) num_written = wintypes.DWORD(0) diff --git a/libs/colorama/winterm.py b/libs/colorama/winterm.py index 60309d3c..0fdb4ec4 100644 --- a/libs/colorama/winterm.py +++ b/libs/colorama/winterm.py @@ -44,6 +44,7 @@ class WinTerm(object): def reset_all(self, on_stderr=None): self.set_attrs(self._default) self.set_console(attrs=self._default) + self._light = 0 def fore(self, fore=None, light=False, on_stderr=False): if fore is None: @@ -122,12 +123,15 @@ class WinTerm(object): if mode == 0: from_coord = csbi.dwCursorPosition cells_to_erase = cells_in_screen - cells_before_cursor - if mode == 1: + elif mode == 1: from_coord = win32.COORD(0, 0) cells_to_erase = cells_before_cursor elif mode == 2: from_coord = win32.COORD(0, 0) cells_to_erase = cells_in_screen + else: + # invalid mode + return # fill the entire screen with blanks win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord) # now set the buffer's attributes accordingly @@ -147,12 +151,15 @@ class WinTerm(object): if mode == 0: from_coord = csbi.dwCursorPosition cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X - if mode == 1: + elif mode == 1: from_coord = win32.COORD(0, csbi.dwCursorPosition.Y) cells_to_erase = csbi.dwCursorPosition.X elif mode == 2: from_coord = win32.COORD(0, csbi.dwCursorPosition.Y) cells_to_erase = csbi.dwSize.X + else: + # invalid mode + return # fill the entire screen with blanks win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord) # now set the buffer's attributes accordingly diff --git a/libs/jellyfish/__init__.py b/libs/jellyfish/__init__.py index 78345699..ca124f2a 100644 --- a/libs/jellyfish/__init__.py +++ b/libs/jellyfish/__init__.py @@ -1,4 +1,6 @@ try: from .cjellyfish import * # noqa + library = "C" except ImportError: from ._jellyfish import * # noqa + library = "Python" diff --git a/libs/jellyfish/_jellyfish.py b/libs/jellyfish/_jellyfish.py index a596bb73..05dade4f 100644 --- a/libs/jellyfish/_jellyfish.py +++ b/libs/jellyfish/_jellyfish.py @@ -1,6 +1,6 @@ import unicodedata from collections import defaultdict -from .compat import _range, _zip_longest, _no_bytes_err +from .compat import _range, _zip_longest, IS_PY3 from .porter import Stemmer @@ -8,9 +8,16 @@ def _normalize(s): return unicodedata.normalize('NFKD', s) +def _check_type(s): + if IS_PY3 and not isinstance(s, str): + raise TypeError('expected str or unicode, got %s' % type(s).__name__) + elif not IS_PY3 and not isinstance(s, unicode): + raise TypeError('expected unicode, got %s' % type(s).__name__) + + def levenshtein_distance(s1, s2): - if isinstance(s1, bytes) or isinstance(s2, bytes): - raise TypeError(_no_bytes_err) + _check_type(s1) + _check_type(s2) if s1 == s2: return 0 @@ -36,14 +43,14 @@ def levenshtein_distance(s1, s2): def _jaro_winkler(ying, yang, long_tolerance, winklerize): - if isinstance(ying, bytes) or isinstance(yang, bytes): - raise TypeError(_no_bytes_err) + _check_type(ying) + _check_type(yang) ying_len = len(ying) yang_len = len(yang) if not ying_len or not yang_len: - return 0 + return 0.0 min_len = max(ying_len, yang_len) search_range = (min_len // 2) - 1 @@ -66,7 +73,7 @@ def _jaro_winkler(ying, yang, long_tolerance, winklerize): # short circuit if no characters match if not common_chars: - return 0 + return 0.0 # count transpositions k = trans_count = 0 @@ -106,8 +113,8 @@ def _jaro_winkler(ying, yang, long_tolerance, winklerize): def damerau_levenshtein_distance(s1, s2): - if isinstance(s1, bytes) or isinstance(s2, bytes): - raise TypeError(_no_bytes_err) + _check_type(s1) + _check_type(s2) len1 = len(s1) len2 = len(s2) @@ -155,25 +162,27 @@ def jaro_winkler(s1, s2, long_tolerance=False): def soundex(s): + + _check_type(s) + if not s: - return s - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + return '' s = _normalize(s) + s = s.upper() - replacements = (('bfpv', '1'), - ('cgjkqsxz', '2'), - ('dt', '3'), - ('l', '4'), - ('mn', '5'), - ('r', '6')) + replacements = (('BFPV', '1'), + ('CGJKQSXZ', '2'), + ('DT', '3'), + ('L', '4'), + ('MN', '5'), + ('R', '6')) result = [s[0]] count = 1 # find would-be replacment for first character for lset, sub in replacements: - if s[0].lower() in lset: + if s[0] in lset: last = sub break else: @@ -181,7 +190,7 @@ def soundex(s): for letter in s[1:]: for lset, sub in replacements: - if letter.lower() in lset: + if letter in lset: if sub != last: result.append(sub) count += 1 @@ -197,8 +206,8 @@ def soundex(s): def hamming_distance(s1, s2): - if isinstance(s1, bytes) or isinstance(s2, bytes): - raise TypeError(_no_bytes_err) + _check_type(s1) + _check_type(s2) # ensure length of s1 >= s2 if len(s2) > len(s1): @@ -214,8 +223,9 @@ def hamming_distance(s1, s2): def nysiis(s): - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + + _check_type(s) + if not s: return '' @@ -303,8 +313,8 @@ def nysiis(s): def match_rating_codex(s): - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + _check_type(s) + s = s.upper() codex = [] @@ -368,8 +378,7 @@ def match_rating_comparison(s1, s2): def metaphone(s): - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + _check_type(s) result = [] @@ -457,8 +466,9 @@ def metaphone(s): elif c == 'w': if i == 0 and next == 'h': i += 1 - next = s[i+1] - if next in 'aeiou': + if nextnext in 'aeiou' or nextnext == '*****': + result.append('w') + elif next in 'aeiou' or next == '*****': result.append('w') elif c == 'x': if i == 0: @@ -484,6 +494,6 @@ def metaphone(s): def porter_stem(s): - if isinstance(s, bytes): - raise TypeError(_no_bytes_err) + _check_type(s) + return Stemmer(s).stem() diff --git a/libs/jellyfish/cjellyfish.pyd b/libs/jellyfish/cjellyfish.pyd deleted file mode 100644 index fb20e5d7..00000000 Binary files a/libs/jellyfish/cjellyfish.pyd and /dev/null differ diff --git a/libs/jellyfish/compat.py b/libs/jellyfish/compat.py index b5e09792..180283d1 100644 --- a/libs/jellyfish/compat.py +++ b/libs/jellyfish/compat.py @@ -6,8 +6,6 @@ IS_PY3 = sys.version_info[0] == 3 if IS_PY3: _range = range _zip_longest = itertools.zip_longest - _no_bytes_err = 'expected str, got bytes' else: _range = xrange _zip_longest = itertools.izip_longest - _no_bytes_err = 'expected unicode, got str' diff --git a/libs/jellyfish/test.py b/libs/jellyfish/test.py index 72ef9344..dea87c25 100644 --- a/libs/jellyfish/test.py +++ b/libs/jellyfish/test.py @@ -112,7 +112,6 @@ if platform.python_implementation() == 'CPython': # this segfaulted on 0.1.2 assert [[jf.match_rating_comparison(h1, h2) for h1 in sha1s] for h2 in sha1s] - def test_damerau_levenshtein_unicode_segfault(): # unfortunate difference in behavior between Py & C versions from jellyfish.cjellyfish import damerau_levenshtein_distance as c_dl diff --git a/libs/munkres.py b/libs/munkres.py index 187333b3..3a70ff06 100644 --- a/libs/munkres.py +++ b/libs/munkres.py @@ -14,7 +14,7 @@ useful for solving the Assignment Problem. Assignment Problem ================== -Let *C* be an *n*\ x\ *n* matrix representing the costs of each of *n* workers +Let *C* be an *n* by *n* matrix representing the costs of each of *n* workers to perform any of *n* jobs. The assignment problem is to assign jobs to workers in a way that minimizes the total cost. Since each worker can perform only one job and each job can be assigned to only one worker the assignments @@ -23,13 +23,13 @@ represent an independent set of the matrix *C*. One way to generate the optimal set is to create all permutations of the indexes necessary to traverse the matrix so that no row and column are used more than once. For instance, given this matrix (expressed in -Python):: +Python): matrix = [[5, 9, 1], [10, 3, 2], [8, 7, 4]] -You could use this code to generate the traversal indexes:: +You could use this code to generate the traversal indexes: def permute(a, results): if len(a) == 1: @@ -48,7 +48,7 @@ You could use this code to generate the traversal indexes:: results = [] permute(range(len(matrix)), results) # [0, 1, 2] for a 3x3 matrix -After the call to permute(), the results matrix would look like this:: +After the call to permute(), the results matrix would look like this: [[0, 1, 2], [0, 2, 1], @@ -58,13 +58,12 @@ After the call to permute(), the results matrix would look like this:: [2, 1, 0]] You could then use that index matrix to loop over the original cost matrix -and calculate the smallest cost of the combinations:: +and calculate the smallest cost of the combinations: - n = len(matrix) minval = sys.maxsize - for row in range(n): + for indexes in results: cost = 0 - for col in range(n): + for row, col in enumerate(indexes): cost += matrix[row][col] minval = min(cost, minval) @@ -82,23 +81,23 @@ The Munkres algorithm runs in O(*n*\ ^3) time, rather than O(*n*!). This package provides an implementation of that algorithm. This version is based on -http://www.public.iastate.edu/~ddoty/HungarianAlgorithm.html. +http://csclab.murraystate.edu/~bob.pilgrim/445/munkres.html -This version was written for Python by Brian Clapper from the (Ada) algorithm -at the above web site. (The ``Algorithm::Munkres`` Perl version, in CPAN, was +This version was written for Python by Brian Clapper from the algorithm +at the above web site. (The ``Algorithm:Munkres`` Perl version, in CPAN, was clearly adapted from the same web site.) Usage ===== -Construct a Munkres object:: +Construct a Munkres object: from munkres import Munkres m = Munkres() Then use it to compute the lowest cost assignment from a cost matrix. Here's -a sample program:: +a sample program: from munkres import Munkres, print_matrix @@ -115,7 +114,7 @@ a sample program:: print '(%d, %d) -> %d' % (row, column, value) print 'total cost: %d' % total -Running that program produces:: +Running that program produces: Lowest cost through this matrix: [5, 9, 1] @@ -152,7 +151,7 @@ the combination of elements (one from each row and column) that results in the smallest cost. It's also possible to use the algorithm to maximize profit. To do that, however, you have to convert your profit matrix to a cost matrix. The simplest way to do that is to subtract all elements from a -large value. For example:: +large value. For example: from munkres import Munkres, print_matrix @@ -177,7 +176,7 @@ large value. For example:: print 'total profit=%d' % total -Running that program produces:: +Running that program produces: Highest profit through this matrix: [5, 9, 1] @@ -189,27 +188,32 @@ Running that program produces:: total profit=23 The ``munkres`` module provides a convenience method for creating a cost -matrix from a profit matrix. Since it doesn't know whether the matrix contains -floating point numbers, decimals, or integers, you have to provide the +matrix from a profit matrix. By default, it calculates the maximum profit +and subtracts every profit from it to obtain a cost. If, however, you +need a more general function, you can provide the conversion function; but the convenience method takes care of the actual -creation of the cost matrix:: +creation of the matrix: import munkres - cost_matrix = munkres.make_cost_matrix(matrix, - lambda cost: sys.maxsize - cost) + cost_matrix = munkres.make_cost_matrix( + matrix, + lambda profit: 1000.0 - math.sqrt(profit)) -So, the above profit-calculation program can be recast as:: +So, the above profit-calculation program can be recast as: from munkres import Munkres, print_matrix, make_cost_matrix matrix = [[5, 9, 1], [10, 3, 2], [8, 7, 4]] - cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxsize - cost) + cost_matrix = make_cost_matrix(matrix) + # cost_matrix == [[5, 1, 9], + # [0, 7, 8], + # [2, 3, 6]] m = Munkres() indexes = m.compute(cost_matrix) - print_matrix(matrix, msg='Lowest cost through this matrix:') + print_matrix(matrix, msg='Highest profits through this matrix:') total = 0 for row, column in indexes: value = matrix[row][column] @@ -217,6 +221,40 @@ So, the above profit-calculation program can be recast as:: print '(%d, %d) -> %d' % (row, column, value) print 'total profit=%d' % total +Disallowed Assignments +====================== + +You can also mark assignments in your cost or profit matrix as disallowed. +Simply use the munkres.DISALLOWED constant. + + from munkres import Munkres, print_matrix, make_cost_matrix, DISALLOWED + + matrix = [[5, 9, DISALLOWED], + [10, DISALLOWED, 2], + [8, 7, 4]] + cost_matrix = make_cost_matrix(matrix, lambda cost: (sys.maxsize - cost) if + (cost != DISALLOWED) else DISALLOWED) + m = Munkres() + indexes = m.compute(cost_matrix) + print_matrix(matrix, msg='Highest profit through this matrix:') + total = 0 + for row, column in indexes: + value = matrix[row][column] + total += value + print '(%d, %d) -> %d' % (row, column, value) + print 'total profit=%d' % total + +Running this program produces: + + Lowest cost through this matrix: + [ 5, 9, D] + [10, D, 2] + [ 8, 7, 4] + (0, 1) -> 9 + (1, 0) -> 10 + (2, 2) -> 4 + total profit=23 + References ========== @@ -237,37 +275,19 @@ References Copyright and License ===================== -This software is released under a BSD license, adapted from -<http://opensource.org/licenses/bsd-license.php> +Copyright 2008-2016 Brian M. Clapper -Copyright (c) 2008 Brian M. Clapper -All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: + http://www.apache.org/licenses/LICENSE-2.0 -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name "clapper.org" nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. """ __docformat__ = 'restructuredtext' @@ -283,18 +303,34 @@ import copy # Exports # --------------------------------------------------------------------------- -__all__ = ['Munkres', 'make_cost_matrix'] +__all__ = ['Munkres', 'make_cost_matrix', 'DISALLOWED'] # --------------------------------------------------------------------------- # Globals # --------------------------------------------------------------------------- # Info about the module -__version__ = "1.0.7" +__version__ = "1.0.12" __author__ = "Brian Clapper, bmc@clapper.org" __url__ = "http://software.clapper.org/munkres/" -__copyright__ = "(c) 2008 Brian M. Clapper" -__license__ = "BSD-style license" +__copyright__ = "(c) 2008-2017 Brian M. Clapper" +__license__ = "Apache Software License" + +# Constants +class DISALLOWED_OBJ(object): + pass +DISALLOWED = DISALLOWED_OBJ() +DISALLOWED_PRINTVAL = "D" + +# --------------------------------------------------------------------------- +# Exceptions +# --------------------------------------------------------------------------- + +class UnsolvableMatrix(Exception): + """ + Exception raised for unsolvable matrices + """ + pass # --------------------------------------------------------------------------- # Classes @@ -441,12 +477,19 @@ class Munkres: C = self.C n = self.n for i in range(n): - minval = min(self.C[i]) + vals = [x for x in self.C[i] if x is not DISALLOWED] + if len(vals) == 0: + # All values in this row are DISALLOWED. This matrix is + # unsolvable. + raise UnsolvableMatrix( + "Row {0} is entirely DISALLOWED.".format(i) + ) + minval = min(vals) # Find the minimum value for this row and subtract that minimum # from every element in the row. for j in range(n): - self.C[i][j] -= minval - + if self.C[i][j] is not DISALLOWED: + self.C[i][j] -= minval return 2 def __step2(self): @@ -464,6 +507,7 @@ class Munkres: self.marked[i][j] = 1 self.col_covered[j] = True self.row_covered[i] = True + break self.__clear_covers() return 3 @@ -478,7 +522,7 @@ class Munkres: count = 0 for i in range(n): for j in range(n): - if self.marked[i][j] == 1: + if self.marked[i][j] == 1 and not self.col_covered[j]: self.col_covered[j] = True count += 1 @@ -499,11 +543,11 @@ class Munkres: """ step = 0 done = False - row = -1 - col = -1 + row = 0 + col = 0 star_col = -1 while not done: - (row, col) = self.__find_a_zero() + (row, col) = self.__find_a_zero(row, col) if row < 0: done = True step = 6 @@ -566,12 +610,21 @@ class Munkres: lines. """ minval = self.__find_smallest() + events = 0 # track actual changes to matrix for i in range(self.n): for j in range(self.n): + if self.C[i][j] is DISALLOWED: + continue if self.row_covered[i]: self.C[i][j] += minval + events += 1 if not self.col_covered[j]: self.C[i][j] -= minval + events += 1 + if self.row_covered[i] and not self.col_covered[j]: + events -= 2 # change reversed, no real difference + if (events == 0): + raise UnsolvableMatrix("Matrix cannot be solved!") return 4 def __find_smallest(self): @@ -580,20 +633,21 @@ class Munkres: for i in range(self.n): for j in range(self.n): if (not self.row_covered[i]) and (not self.col_covered[j]): - if minval > self.C[i][j]: + if self.C[i][j] is not DISALLOWED and minval > self.C[i][j]: minval = self.C[i][j] return minval - def __find_a_zero(self): + + def __find_a_zero(self, i0=0, j0=0): """Find the first uncovered element with value 0""" row = -1 col = -1 - i = 0 + i = i0 n = self.n done = False while not done: - j = 0 + j = j0 while True: if (self.C[i][j] == 0) and \ (not self.row_covered[i]) and \ @@ -601,11 +655,11 @@ class Munkres: row = i col = j done = True - j += 1 - if j >= n: + j = (j + 1) % n + if j == j0: break - i += 1 - if i >= n: + i = (i + 1) % n + if i == i0: done = True return (row, col) @@ -673,23 +727,24 @@ class Munkres: # Functions # --------------------------------------------------------------------------- -def make_cost_matrix(profit_matrix, inversion_function): +def make_cost_matrix(profit_matrix, inversion_function=None): """ Create a cost matrix from a profit matrix by calling 'inversion_function' to invert each value. The inversion function must take one numeric argument (of any type) and return another numeric argument which is presumed to be the cost inverse - of the original profit. + of the original profit. In case the inversion function is not provided, + calculate it as max(matrix) - matrix. This is a static method. Call it like this: - .. python:: + .. python: cost_matrix = Munkres.make_cost_matrix(matrix, inversion_func) For example: - .. python:: + .. python: cost_matrix = Munkres.make_cost_matrix(matrix, lambda x : sys.maxsize - x) @@ -698,11 +753,16 @@ def make_cost_matrix(profit_matrix, inversion_function): The matrix to convert from a profit to a cost matrix inversion_function : function - The function to use to invert each entry in the profit matrix + The function to use to invert each entry in the profit matrix. + In case it is not provided, calculate it as max(matrix) - matrix. :rtype: list of lists :return: The converted matrix """ + if not inversion_function: + maximum = max(max(row) for row in profit_matrix) + inversion_function = lambda x: maximum - x + cost_matrix = [] for row in profit_matrix: cost_matrix.append([inversion_function(value) for value in row]) @@ -728,16 +788,21 @@ def print_matrix(matrix, msg=None): width = 0 for row in matrix: for val in row: - width = max(width, int(math.log10(val)) + 1) + if val is DISALLOWED: + val = DISALLOWED_PRINTVAL + width = max(width, len(str(val))) # Make the format string - format = '%%%dd' % width + format = ('%%%d' % width) # Print the matrix for row in matrix: sep = '[' for val in row: - sys.stdout.write(sep + format % val) + if val is DISALLOWED: + formatted = ((format + 's') % DISALLOWED_PRINTVAL) + else: formatted = ((format + 'd') % val) + sys.stdout.write(sep + formatted) sep = ', ' sys.stdout.write(']\n') @@ -771,7 +836,21 @@ if __name__ == '__main__': ([[10, 10, 8, 11], [9, 8, 1, 1], [9, 7, 4, 10]], - 15)] + 15), + + # Rectangular with DISALLOWED + ([[4, 5, 6, DISALLOWED], + [1, 9, 12, 11], + [DISALLOWED, 5, 4, DISALLOWED], + [12, 12, 12, 10]], + 20), + + # DISALLOWED to force pairings + ([[1, DISALLOWED, DISALLOWED, DISALLOWED], + [DISALLOWED, 2, DISALLOWED, DISALLOWED], + [DISALLOWED, DISALLOWED, 3, DISALLOWED], + [DISALLOWED, DISALLOWED, DISALLOWED, 4]], + 10)] m = Munkres() for cost_matrix, expected_total in matrices: diff --git a/libs/mutagen/__init__.py b/libs/mutagen/__init__.py index c1abc0b1..94f2509c 100644 --- a/libs/mutagen/__init__.py +++ b/libs/mutagen/__init__.py @@ -1,11 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. - +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Mutagen aims to be an all purpose multimedia tagging library. @@ -14,7 +13,7 @@ import mutagen.[format] metadata = mutagen.[format].Open(filename) -`metadata` acts like a dictionary of tags in the file. Tags are generally a +``metadata`` acts like a dictionary of tags in the file. Tags are generally a list of string-like values, but may have additional methods available depending on tag or format. They may also be entirely different objects for certain keys, again depending on format. @@ -24,7 +23,7 @@ from mutagen._util import MutagenError from mutagen._file import FileType, StreamInfo, File from mutagen._tags import Tags, Metadata, PaddingInfo -version = (1, 32) +version = (1, 41, 1) """Version tuple.""" version_string = ".".join(map(str, version)) diff --git a/libs/mutagen/_compat.py b/libs/mutagen/_compat.py index 77c465f1..8a60d68d 100644 --- a/libs/mutagen/_compat.py +++ b/libs/mutagen/_compat.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import sys @@ -47,6 +47,9 @@ if PY2: return cls + import __builtin__ as builtins + builtins + elif PY3: from io import StringIO StringIO = StringIO @@ -84,3 +87,6 @@ elif PY3: def swap_to_string(cls): return cls + + import builtins + builtins diff --git a/libs/mutagen/_constants.py b/libs/mutagen/_constants.py index 62c1ce02..5c1c1a10 100644 --- a/libs/mutagen/_constants.py +++ b/libs/mutagen/_constants.py @@ -1,4 +1,9 @@ # -*- coding: utf-8 -*- +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Constants used by Mutagen.""" diff --git a/libs/mutagen/_file.py b/libs/mutagen/_file.py index 95f400cf..2405a523 100644 --- a/libs/mutagen/_file.py +++ b/libs/mutagen/_file.py @@ -1,23 +1,26 @@ -# Copyright (C) 2005 Michael Urman # -*- coding: utf-8 -*- +# Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import warnings -from mutagen._util import DictMixin +from mutagen._util import DictMixin, loadfile from mutagen._compat import izip class FileType(DictMixin): - """An abstract object wrapping tags and audio stream information. + """FileType(filething, **kwargs) - Attributes: + Args: + filething (filething): A filename or a file-like object - * info -- :class:`StreamInfo` -- (length, bitrate, sample rate) - * tags -- :class:`Tags` -- metadata tags, if any + Subclasses might take further options via keyword arguments. + + An abstract object wrapping tags and audio stream information. Each file format has different potential tags and stream information. @@ -25,6 +28,10 @@ class FileType(DictMixin): FileTypes implement an interface very similar to Metadata; the dict interface, save, load, and delete calls on a FileType call the appropriate methods on its tag data. + + Attributes: + info (`StreamInfo`): contains length, bitrate, sample rate + tags (`Tags`): metadata tags, if any, otherwise `None` """ __module__ = "mutagen" @@ -34,14 +41,15 @@ class FileType(DictMixin): filename = None _mimes = ["application/octet-stream"] - def __init__(self, filename=None, *args, **kwargs): - if filename is None: + def __init__(self, *args, **kwargs): + if not args and not kwargs: warnings.warn("FileType constructor requires a filename", DeprecationWarning) else: - self.load(filename, *args, **kwargs) + self.load(*args, **kwargs) - def load(self, filename, *args, **kwargs): + @loadfile() + def load(self, filething, *args, **kwargs): raise NotImplementedError def __getitem__(self, key): @@ -88,11 +96,14 @@ class FileType(DictMixin): else: return self.tags.keys() - def delete(self, filename=None): - """Remove tags from a file. + @loadfile(writable=True) + def delete(self, filething): + """delete(filething=None) + + Remove tags from a file. In cases where the tagging format is independent of the file type - (for example `mutagen.ID3`) all traces of the tagging format will + (for example `mutagen.id3.ID3`) all traces of the tagging format will be removed. In cases where the tag is part of the file type, all tags and padding will be removed. @@ -101,36 +112,31 @@ class FileType(DictMixin): Does nothing if the file has no tags. - :raises mutagen.MutagenError: if deleting wasn't possible + Raises: + mutagen.MutagenError: if deleting wasn't possible """ if self.tags is not None: - if filename is None: - filename = self.filename - else: - warnings.warn( - "delete(filename=...) is deprecated, reload the file", - DeprecationWarning) - return self.tags.delete(filename) + return self.tags.delete(filething) - def save(self, filename=None, **kwargs): - """Save metadata tags. + @loadfile(writable=True) + def save(self, filething, **kwargs): + """save(filething=None, **kwargs) - :raises mutagen.MutagenError: if saving wasn't possible + Save metadata tags. + + Raises: + MutagenError: if saving wasn't possible """ - if filename is None: - filename = self.filename - else: - warnings.warn( - "save(filename=...) is deprecated, reload the file", - DeprecationWarning) - if self.tags is not None: - return self.tags.save(filename, **kwargs) + return self.tags.save(filething, **kwargs) def pprint(self): - """Print stream information and comment key=value pairs.""" + """ + Returns: + text: stream information and comment key=value pairs. + """ stream = "%s (%s)" % (self.info.pprint(), self.mime[0]) try: @@ -143,15 +149,16 @@ class FileType(DictMixin): def add_tags(self): """Adds new tags to the file. - :raises mutagen.MutagenError: if tags already exist or adding is not - possible. + Raises: + mutagen.MutagenError: + if tags already exist or adding is not possible. """ raise NotImplementedError @property def mime(self): - """A list of mime types""" + """A list of mime types (:class:`mutagen.text`)""" mimes = [] for Kind in type(self).__mro__: @@ -162,6 +169,20 @@ class FileType(DictMixin): @staticmethod def score(filename, fileobj, header): + """Returns a score for how likely the file can be parsed by this type. + + Args: + filename (fspath): a file path + fileobj (fileobj): a file object open in rb mode. Position is + undefined + header (bytes): data of undefined length, starts with the start of + the file. + + Returns: + int: negative if definitely not a matching type, otherwise a score, + the bigger the more certain that the file can be loaded. + """ + raise NotImplementedError @@ -176,13 +197,19 @@ class StreamInfo(object): __module__ = "mutagen" def pprint(self): - """Print stream information""" + """ + Returns: + text: Print stream information + """ raise NotImplementedError -def File(filename, options=None, easy=False): - """Guess the type of the file and try to open it. +@loadfile(method=False) +def File(filething, options=None, easy=False): + """File(filething, options=None, easy=False) + + Guess the type of the file and try to open it. The file type is decided by several things, such as the first 128 bytes (which usually contains a file type identifier), the @@ -190,12 +217,20 @@ def File(filename, options=None, easy=False): If no appropriate type could be found, None is returned. - :param options: Sequence of :class:`FileType` implementations, defaults to - all included ones. + Args: + filething (filething) + options: Sequence of :class:`FileType` implementations, + defaults to all included ones. + easy (bool): If the easy wrappers should be returnd if available. + For example :class:`EasyMP3 <mp3.EasyMP3>` instead of + :class:`MP3 <mp3.MP3>`. - :param easy: If the easy wrappers should be returnd if available. - For example :class:`EasyMP3 <mp3.EasyMP3>` instead - of :class:`MP3 <mp3.MP3>`. + Returns: + FileType: A FileType instance for the detected type or `None` in case + the type couln't be determined. + + Raises: + MutagenError: in case the detected type fails to load the file. """ if options is None: @@ -230,26 +265,36 @@ def File(filename, options=None, easy=False): from mutagen.aiff import AIFF from mutagen.aac import AAC from mutagen.smf import SMF + from mutagen.dsf import DSF options = [MP3, TrueAudio, OggTheora, OggSpeex, OggVorbis, OggFLAC, FLAC, AIFF, APEv2File, MP4, ID3FileType, WavPack, Musepack, MonkeysAudio, OptimFROG, ASF, OggOpus, AAC, - SMF] + SMF, DSF] if not options: return None - with open(filename, "rb") as fileobj: + fileobj = filething.fileobj + + try: header = fileobj.read(128) - # Sort by name after score. Otherwise import order affects - # Kind sort order, which affects treatment of things with - # equals scores. - results = [(Kind.score(filename, fileobj, header), Kind.__name__) - for Kind in options] + except IOError: + header = b"" + + # Sort by name after score. Otherwise import order affects + # Kind sort order, which affects treatment of things with + # equals scores. + results = [(Kind.score(filething.name, fileobj, header), Kind.__name__) + for Kind in options] results = list(izip(results, options)) results.sort() (score, name), Kind = results[-1] if score > 0: - return Kind(filename) + try: + fileobj.seek(0, 0) + except IOError: + pass + return Kind(fileobj, filename=filething.filename) else: return None diff --git a/libs/mutagen/_senf/__init__.py b/libs/mutagen/_senf/__init__.py new file mode 100644 index 00000000..074c3200 --- /dev/null +++ b/libs/mutagen/_senf/__init__.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os + +if os.name != "nt": + # make imports work + _winapi = object() + +from ._fsnative import fsnative, path2fsn, fsn2text, fsn2bytes, \ + bytes2fsn, uri2fsn, fsn2uri, text2fsn, fsn2norm +from ._print import print_, input_, supports_ansi_escape_codes +from ._stdlib import sep, pathsep, curdir, pardir, altsep, extsep, devnull, \ + defpath, getcwd, expanduser, expandvars +from ._argv import argv +from ._environ import environ, getenv, unsetenv, putenv +from ._temp import mkstemp, gettempdir, gettempprefix, mkdtemp + + +fsnative, print_, getcwd, getenv, unsetenv, putenv, environ, expandvars, \ + path2fsn, fsn2text, fsn2bytes, bytes2fsn, uri2fsn, fsn2uri, mkstemp, \ + gettempdir, gettempprefix, mkdtemp, input_, expanduser, text2fsn, \ + supports_ansi_escape_codes, fsn2norm + + +version = (1, 3, 4) +"""Tuple[`int`, `int`, `int`]: The version tuple (major, minor, micro)""" + + +version_string = ".".join(map(str, version)) +"""`str`: A version string""" + + +argv = argv +"""List[`fsnative`]: Like `sys.argv` but contains unicode under +Windows + Python 2 +""" + + +sep = sep +"""`fsnative`: Like `os.sep` but a `fsnative`""" + + +pathsep = pathsep +"""`fsnative`: Like `os.pathsep` but a `fsnative`""" + + +curdir = curdir +"""`fsnative`: Like `os.curdir` but a `fsnative`""" + + +pardir = pardir +"""`fsnative`: Like `os.pardir` but a fsnative""" + + +altsep = altsep +"""`fsnative` or `None`: Like `os.altsep` but a `fsnative` or `None`""" + + +extsep = extsep +"""`fsnative`: Like `os.extsep` but a `fsnative`""" + + +devnull = devnull +"""`fsnative`: Like `os.devnull` but a `fsnative`""" + + +defpath = defpath +"""`fsnative`: Like `os.defpath` but a `fsnative`""" + + +__all__ = [] diff --git a/libs/mutagen/_senf/_argv.py b/libs/mutagen/_senf/_argv.py new file mode 100644 index 00000000..c335b598 --- /dev/null +++ b/libs/mutagen/_senf/_argv.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import sys +import ctypes +import collections +from functools import total_ordering + +from ._compat import PY2, string_types +from ._fsnative import is_win, _fsn2legacy, path2fsn +from . import _winapi as winapi + + +def _get_win_argv(): + """Returns a unicode argv under Windows and standard sys.argv otherwise + + Returns: + List[`fsnative`] + """ + + assert is_win + + argc = ctypes.c_int() + try: + argv = winapi.CommandLineToArgvW( + winapi.GetCommandLineW(), ctypes.byref(argc)) + except WindowsError: + return [] + + if not argv: + return [] + + res = argv[max(0, argc.value - len(sys.argv)):argc.value] + + winapi.LocalFree(argv) + + return res + + +@total_ordering +class Argv(collections.MutableSequence): + """List[`fsnative`]: Like `sys.argv` but contains unicode + keys and values under Windows + Python 2. + + Any changes made will be forwarded to `sys.argv`. + """ + + def __init__(self): + if PY2 and is_win: + self._argv = _get_win_argv() + else: + self._argv = sys.argv + + def __getitem__(self, index): + return self._argv[index] + + def __setitem__(self, index, value): + if isinstance(value, string_types): + value = path2fsn(value) + + self._argv[index] = value + + if sys.argv is not self._argv: + try: + if isinstance(value, string_types): + sys.argv[index] = _fsn2legacy(value) + else: + sys.argv[index] = [_fsn2legacy(path2fsn(v)) for v in value] + except IndexError: + pass + + def __delitem__(self, index): + del self._argv[index] + try: + del sys.argv[index] + except IndexError: + pass + + def __eq__(self, other): + return self._argv == other + + def __lt__(self, other): + return self._argv < other + + def __len__(self): + return len(self._argv) + + def __repr__(self): + return repr(self._argv) + + def insert(self, index, value): + value = path2fsn(value) + self._argv.insert(index, value) + if sys.argv is not self._argv: + sys.argv.insert(index, _fsn2legacy(value)) + + +argv = Argv() diff --git a/libs/mutagen/_senf/_compat.py b/libs/mutagen/_senf/_compat.py new file mode 100644 index 00000000..bf1cb304 --- /dev/null +++ b/libs/mutagen/_senf/_compat.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import sys + + +PY2 = sys.version_info[0] == 2 +PY3 = not PY2 + + +if PY2: + from urlparse import urlparse, urlunparse + urlparse, urlunparse + from urllib import quote, unquote + quote, unquote + + from StringIO import StringIO + BytesIO = StringIO + from io import StringIO as TextIO + TextIO + + string_types = (str, unicode) + text_type = unicode + + iteritems = lambda d: d.iteritems() +elif PY3: + from urllib.parse import urlparse, quote, unquote, urlunparse + urlparse, quote, unquote, urlunparse + + from io import StringIO + StringIO = StringIO + TextIO = StringIO + from io import BytesIO + BytesIO = BytesIO + + string_types = (str,) + text_type = str + + iteritems = lambda d: iter(d.items()) diff --git a/libs/mutagen/_senf/_environ.py b/libs/mutagen/_senf/_environ.py new file mode 100644 index 00000000..0d37d07d --- /dev/null +++ b/libs/mutagen/_senf/_environ.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import ctypes +import collections + +from ._compat import text_type, PY2 +from ._fsnative import path2fsn, is_win, _fsn2legacy, fsnative +from . import _winapi as winapi + + +def get_windows_env_var(key): + """Get an env var. + + Raises: + WindowsError + """ + + if not isinstance(key, text_type): + raise TypeError("%r not of type %r" % (key, text_type)) + + buf = ctypes.create_unicode_buffer(32767) + + stored = winapi.GetEnvironmentVariableW(key, buf, 32767) + if stored == 0: + raise ctypes.WinError() + return buf[:stored] + + +def set_windows_env_var(key, value): + """Set an env var. + + Raises: + WindowsError + """ + + if not isinstance(key, text_type): + raise TypeError("%r not of type %r" % (key, text_type)) + + if not isinstance(value, text_type): + raise TypeError("%r not of type %r" % (value, text_type)) + + status = winapi.SetEnvironmentVariableW(key, value) + if status == 0: + raise ctypes.WinError() + + +def del_windows_env_var(key): + """Delete an env var. + + Raises: + WindowsError + """ + + if not isinstance(key, text_type): + raise TypeError("%r not of type %r" % (key, text_type)) + + status = winapi.SetEnvironmentVariableW(key, None) + if status == 0: + raise ctypes.WinError() + + +def read_windows_environ(): + """Returns a unicode dict of the Windows environment. + + Raises: + WindowsEnvironError + """ + + res = winapi.GetEnvironmentStringsW() + if not res: + raise ctypes.WinError() + + res = ctypes.cast(res, ctypes.POINTER(ctypes.c_wchar)) + + done = [] + current = u"" + i = 0 + while 1: + c = res[i] + i += 1 + if c == u"\x00": + if not current: + break + done.append(current) + current = u"" + continue + current += c + + dict_ = {} + for entry in done: + try: + key, value = entry.split(u"=", 1) + except ValueError: + continue + key = _norm_key(key) + dict_[key] = value + + status = winapi.FreeEnvironmentStringsW(res) + if status == 0: + raise ctypes.WinError() + + return dict_ + + +def _norm_key(key): + assert isinstance(key, fsnative) + if is_win: + key = key.upper() + return key + + +class Environ(collections.MutableMapping): + """Dict[`fsnative`, `fsnative`]: Like `os.environ` but contains unicode + keys and values under Windows + Python 2. + + Any changes made will be forwarded to `os.environ`. + """ + + def __init__(self): + if is_win and PY2: + try: + env = read_windows_environ() + except WindowsError: + env = {} + else: + env = os.environ + self._env = env + + def __getitem__(self, key): + key = _norm_key(path2fsn(key)) + return self._env[key] + + def __setitem__(self, key, value): + key = _norm_key(path2fsn(key)) + value = path2fsn(value) + + if is_win and PY2: + # this calls putenv, so do it first and replace later + try: + os.environ[_fsn2legacy(key)] = _fsn2legacy(value) + except OSError: + raise ValueError + + try: + set_windows_env_var(key, value) + except WindowsError: + # py3+win fails for invalid keys. try to do the same + raise ValueError + try: + self._env[key] = value + except OSError: + raise ValueError + + def __delitem__(self, key): + key = _norm_key(path2fsn(key)) + + if is_win and PY2: + try: + del_windows_env_var(key) + except WindowsError: + pass + + try: + del os.environ[_fsn2legacy(key)] + except KeyError: + pass + + del self._env[key] + + def __iter__(self): + return iter(self._env) + + def __len__(self): + return len(self._env) + + def __repr__(self): + return repr(self._env) + + def copy(self): + return self._env.copy() + + +environ = Environ() + + +def getenv(key, value=None): + """Like `os.getenv` but returns unicode under Windows + Python 2 + + Args: + key (pathlike): The env var to get + value (object): The value to return if the env var does not exist + Returns: + `fsnative` or `object`: + The env var or the passed value if it doesn't exist + """ + + key = path2fsn(key) + if is_win and PY2: + return environ.get(key, value) + return os.getenv(key, value) + + +def unsetenv(key): + """Like `os.unsetenv` but takes unicode under Windows + Python 2 + + Args: + key (pathlike): The env var to unset + """ + + key = path2fsn(key) + if is_win: + # python 3 has no unsetenv under Windows -> use our ctypes one as well + try: + del_windows_env_var(key) + except WindowsError: + pass + else: + os.unsetenv(key) + + +def putenv(key, value): + """Like `os.putenv` but takes unicode under Windows + Python 2 + + Args: + key (pathlike): The env var to get + value (pathlike): The value to set + Raises: + ValueError + """ + + key = path2fsn(key) + value = path2fsn(value) + + if is_win and PY2: + try: + set_windows_env_var(key, value) + except WindowsError: + # py3 + win fails here + raise ValueError + else: + try: + os.putenv(key, value) + except OSError: + # win + py3 raise here for invalid keys which is probably a bug. + # ValueError seems better + raise ValueError diff --git a/libs/mutagen/_senf/_fsnative.py b/libs/mutagen/_senf/_fsnative.py new file mode 100644 index 00000000..a1e5967c --- /dev/null +++ b/libs/mutagen/_senf/_fsnative.py @@ -0,0 +1,666 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import sys +import ctypes +import codecs + +from . import _winapi as winapi +from ._compat import text_type, PY3, PY2, urlparse, quote, unquote, urlunparse + + +is_win = os.name == "nt" +is_unix = not is_win +is_darwin = sys.platform == "darwin" + +_surrogatepass = "strict" if PY2 else "surrogatepass" + + +def _normalize_codec(codec, _cache={}): + """Raises LookupError""" + + try: + return _cache[codec] + except KeyError: + _cache[codec] = codecs.lookup(codec).name + return _cache[codec] + + +def _swap_bytes(data): + """swaps bytes for 16 bit, leaves remaining trailing bytes alone""" + + a, b = data[1::2], data[::2] + data = bytearray().join(bytearray(x) for x in zip(a, b)) + if len(b) > len(a): + data += b[-1:] + return bytes(data) + + +def _codec_fails_on_encode_surrogates(codec, _cache={}): + """Returns if a codec fails correctly when passing in surrogates with + a surrogatepass/surrogateescape error handler. Some codecs were broken + in Python <3.4 + """ + + try: + return _cache[codec] + except KeyError: + try: + u"\uD800\uDC01".encode(codec) + except UnicodeEncodeError: + _cache[codec] = True + else: + _cache[codec] = False + return _cache[codec] + + +def _codec_can_decode_with_surrogatepass(codec, _cache={}): + """Returns if a codec supports the surrogatepass error handler when + decoding. Some codecs were broken in Python <3.4 + """ + + try: + return _cache[codec] + except KeyError: + try: + u"\ud83d".encode( + codec, _surrogatepass).decode(codec, _surrogatepass) + except UnicodeDecodeError: + _cache[codec] = False + else: + _cache[codec] = True + return _cache[codec] + + +def _decode_surrogatepass(data, codec): + """Like data.decode(codec, 'surrogatepass') but makes utf-16-le/be work + on Python < 3.4 + Windows + + https://bugs.python.org/issue27971 + + Raises UnicodeDecodeError, LookupError + """ + + try: + return data.decode(codec, _surrogatepass) + except UnicodeDecodeError: + if not _codec_can_decode_with_surrogatepass(codec): + if _normalize_codec(codec) == "utf-16-be": + data = _swap_bytes(data) + codec = "utf-16-le" + if _normalize_codec(codec) == "utf-16-le": + buffer_ = ctypes.create_string_buffer(data + b"\x00\x00") + value = ctypes.wstring_at(buffer_, len(data) // 2) + if value.encode("utf-16-le", _surrogatepass) != data: + raise + return value + else: + raise + else: + raise + + +def _winpath2bytes_py3(text, codec): + """Fallback implementation for text including surrogates""" + + # merge surrogate codepoints + if _normalize_codec(codec).startswith("utf-16"): + # fast path, utf-16 merges anyway + return text.encode(codec, _surrogatepass) + return _decode_surrogatepass( + text.encode("utf-16-le", _surrogatepass), + "utf-16-le").encode(codec, _surrogatepass) + + +if PY2: + def _winpath2bytes(text, codec): + return text.encode(codec) +else: + def _winpath2bytes(text, codec): + if _codec_fails_on_encode_surrogates(codec): + try: + return text.encode(codec) + except UnicodeEncodeError: + return _winpath2bytes_py3(text, codec) + else: + return _winpath2bytes_py3(text, codec) + + +def fsn2norm(path): + """ + Args: + path (fsnative): The path to normalize + Returns: + `fsnative` + + Normalizes an fsnative path. + + The same underlying path can have multiple representations as fsnative + (due to surrogate pairs and variable length encodings). When concatenating + fsnative the result might be different than concatenating the serialized + form and then deserializing it. + + This returns the normalized form i.e. the form which os.listdir() would + return. This is useful when you alter fsnative but require that the same + underlying path always maps to the same fsnative value. + + All functions like :func:`bytes2fsn`, :func:`fsnative`, :func:`text2fsn` + and :func:`path2fsn` always return a normalized path, independent of their + input. + """ + + native = _fsn2native(path) + + if is_win: + return _decode_surrogatepass( + native.encode("utf-16-le", _surrogatepass), + "utf-16-le") + elif PY3: + return bytes2fsn(native, None) + else: + return path + + +def _fsn2legacy(path): + """Takes a fsnative path and returns a path that can be put into os.environ + or sys.argv. Might result in a mangled path on Python2 + Windows. + Can't fail. + + Args: + path (fsnative) + Returns: + str + """ + + if PY2 and is_win: + return path.encode(_encoding, "replace") + return path + + +def _fsnative(text): + if not isinstance(text, text_type): + raise TypeError("%r needs to be a text type (%r)" % (text, text_type)) + + if is_unix: + # First we go to bytes so we can be sure we have a valid source. + # Theoretically we should fail here in case we have a non-unicode + # encoding. But this would make everything complicated and there is + # no good way to handle a failure from the user side. Instead + # fall back to utf-8 which is the most likely the right choice in + # a mis-configured environment + encoding = _encoding + try: + path = text.encode(encoding, _surrogatepass) + except UnicodeEncodeError: + path = text.encode("utf-8", _surrogatepass) + + if b"\x00" in path: + path = path.replace(b"\x00", fsn2bytes(_fsnative(u"\uFFFD"), None)) + + if PY3: + return path.decode(_encoding, "surrogateescape") + return path + else: + if u"\x00" in text: + text = text.replace(u"\x00", u"\uFFFD") + text = fsn2norm(text) + return text + + +def _create_fsnative(type_): + # a bit of magic to make fsnative(u"foo") and isinstance(path, fsnative) + # work + + class meta(type): + + def __instancecheck__(self, instance): + return _typecheck_fsnative(instance) + + def __subclasscheck__(self, subclass): + return issubclass(subclass, type_) + + class impl(object): + """fsnative(text=u"") + + Args: + text (text): The text to convert to a path + Returns: + fsnative: The new path. + Raises: + TypeError: In case something other then `text` has been passed + + This type is a virtual base class for the real path type. + Instantiating it returns an instance of the real path type and it + overrides instance and subclass checks so that `isinstance` and + `issubclass` checks work: + + :: + + isinstance(fsnative(u"foo"), fsnative) == True + issubclass(type(fsnative(u"foo")), fsnative) == True + + The real returned type is: + + - **Python 2 + Windows:** :obj:`python:unicode`, with ``surrogates``, + without ``null`` + - **Python 2 + Unix:** :obj:`python:str`, without ``null`` + - **Python 3 + Windows:** :obj:`python3:str`, with ``surrogates``, + without ``null`` + - **Python 3 + Unix:** :obj:`python3:str`, with ``surrogates``, without + ``null``, without code points not encodable with the locale encoding + + Constructing a `fsnative` can't fail. + + Passing a `fsnative` to :func:`open` will never lead to `ValueError` + or `TypeError`. + + Any operation on `fsnative` can also use the `str` type, as long as + the `str` only contains ASCII and no NULL. + """ + + def __new__(cls, text=u""): + return _fsnative(text) + + new_type = meta("fsnative", (object,), dict(impl.__dict__)) + new_type.__module__ = "senf" + return new_type + + +fsnative_type = text_type if is_win or PY3 else bytes +fsnative = _create_fsnative(fsnative_type) + + +def _typecheck_fsnative(path): + """ + Args: + path (object) + Returns: + bool: if path is a fsnative + """ + + if not isinstance(path, fsnative_type): + return False + + if PY3 or is_win: + if u"\x00" in path: + return False + + if is_unix: + try: + path.encode(_encoding, "surrogateescape") + except UnicodeEncodeError: + return False + elif b"\x00" in path: + return False + + return True + + +def _fsn2native(path): + """ + Args: + path (fsnative) + Returns: + `text` on Windows, `bytes` on Unix + Raises: + TypeError: in case the type is wrong or the ´str` on Py3 + Unix + can't be converted to `bytes` + + This helper allows to validate the type and content of a path. + To reduce overhead the encoded value for Py3 + Unix is returned so + it can be reused. + """ + + if not isinstance(path, fsnative_type): + raise TypeError("path needs to be %s, not %s" % ( + fsnative_type.__name__, type(path).__name__)) + + if is_unix: + if PY3: + try: + path = path.encode(_encoding, "surrogateescape") + except UnicodeEncodeError: + # This look more like ValueError, but raising only one error + # makes things simpler... also one could say str + surrogates + # is its own type + raise TypeError( + "path contained Unicode code points not valid in" + "the current path encoding. To create a valid " + "path from Unicode use text2fsn()") + + if b"\x00" in path: + raise TypeError("fsnative can't contain nulls") + else: + if u"\x00" in path: + raise TypeError("fsnative can't contain nulls") + + return path + + +def _get_encoding(): + """The encoding used for paths, argv, environ, stdout and stdin""" + + encoding = sys.getfilesystemencoding() + if encoding is None: + if is_darwin: + encoding = "utf-8" + elif is_win: + encoding = "mbcs" + else: + encoding = "ascii" + encoding = _normalize_codec(encoding) + return encoding + + +_encoding = _get_encoding() + + +def path2fsn(path): + """ + Args: + path (pathlike): The path to convert + Returns: + `fsnative` + Raises: + TypeError: In case the type can't be converted to a `fsnative` + ValueError: In case conversion fails + + Returns a `fsnative` path for a `pathlike`. + """ + + # allow mbcs str on py2+win and bytes on py3 + if PY2: + if is_win: + if isinstance(path, bytes): + path = path.decode(_encoding) + else: + if isinstance(path, text_type): + path = path.encode(_encoding) + if "\x00" in path: + raise ValueError("embedded null") + else: + path = getattr(os, "fspath", lambda x: x)(path) + if isinstance(path, bytes): + if b"\x00" in path: + raise ValueError("embedded null") + path = path.decode(_encoding, "surrogateescape") + elif is_unix and isinstance(path, str): + # make sure we can encode it and this is not just some random + # unicode string + data = path.encode(_encoding, "surrogateescape") + if b"\x00" in data: + raise ValueError("embedded null") + path = fsn2norm(path) + else: + if u"\x00" in path: + raise ValueError("embedded null") + path = fsn2norm(path) + + if not isinstance(path, fsnative_type): + raise TypeError("path needs to be %s", fsnative_type.__name__) + + return path + + +def fsn2text(path, strict=False): + """ + Args: + path (fsnative): The path to convert + strict (bool): Fail in case the conversion is not reversible + Returns: + `text` + Raises: + TypeError: In case no `fsnative` has been passed + ValueError: In case ``strict`` was True and the conversion failed + + Converts a `fsnative` path to `text`. + + Can be used to pass a path to some unicode API, like for example a GUI + toolkit. + + If ``strict`` is True the conversion will fail in case it is not + reversible. This can be useful for converting program arguments that are + supposed to be text and erroring out in case they are not. + + Encoding with a Unicode encoding will always succeed with the result. + """ + + path = _fsn2native(path) + + errors = "strict" if strict else "replace" + + if is_win: + return path.encode("utf-16-le", _surrogatepass).decode("utf-16-le", + errors) + else: + return path.decode(_encoding, errors) + + +def text2fsn(text): + """ + Args: + text (text): The text to convert + Returns: + `fsnative` + Raises: + TypeError: In case no `text` has been passed + + Takes `text` and converts it to a `fsnative`. + + This operation is not reversible and can't fail. + """ + + return fsnative(text) + + +def fsn2bytes(path, encoding="utf-8"): + """ + Args: + path (fsnative): The path to convert + encoding (`str`): encoding used for Windows + Returns: + `bytes` + Raises: + TypeError: If no `fsnative` path is passed + ValueError: If encoding fails or the encoding is invalid + + Converts a `fsnative` path to `bytes`. + + The passed *encoding* is only used on platforms where paths are not + associated with an encoding (Windows for example). + + For Windows paths, lone surrogates will be encoded like normal code points + and surrogate pairs will be merged before encoding. In case of ``utf-8`` + or ``utf-16-le`` this is equal to the `WTF-8 and WTF-16 encoding + <https://simonsapin.github.io/wtf-8/>`__. + """ + + path = _fsn2native(path) + + if is_win: + if encoding is None: + raise ValueError("invalid encoding %r" % encoding) + + try: + return _winpath2bytes(path, encoding) + except LookupError: + raise ValueError("invalid encoding %r" % encoding) + else: + return path + + +def bytes2fsn(data, encoding="utf-8"): + """ + Args: + data (bytes): The data to convert + encoding (`str`): encoding used for Windows + Returns: + `fsnative` + Raises: + TypeError: If no `bytes` path is passed + ValueError: If decoding fails or the encoding is invalid + + Turns `bytes` to a `fsnative` path. + + The passed *encoding* is only used on platforms where paths are not + associated with an encoding (Windows for example). + + For Windows paths ``WTF-8`` is accepted if ``utf-8`` is used and + ``WTF-16`` accepted if ``utf-16-le`` is used. + """ + + if not isinstance(data, bytes): + raise TypeError("data needs to be bytes") + + if is_win: + if encoding is None: + raise ValueError("invalid encoding %r" % encoding) + try: + path = _decode_surrogatepass(data, encoding) + except LookupError: + raise ValueError("invalid encoding %r" % encoding) + if u"\x00" in path: + raise ValueError("contains nulls") + return path + else: + if b"\x00" in data: + raise ValueError("contains nulls") + if PY2: + return data + else: + return data.decode(_encoding, "surrogateescape") + + +def uri2fsn(uri): + """ + Args: + uri (`text` or :obj:`python:str`): A file URI + Returns: + `fsnative` + Raises: + TypeError: In case an invalid type is passed + ValueError: In case the URI isn't a valid file URI + + Takes a file URI and returns a `fsnative` path + """ + + if PY2: + if isinstance(uri, text_type): + uri = uri.encode("utf-8") + if not isinstance(uri, bytes): + raise TypeError("uri needs to be ascii str or unicode") + else: + if not isinstance(uri, str): + raise TypeError("uri needs to be str") + + parsed = urlparse(uri) + scheme = parsed.scheme + netloc = parsed.netloc + path = parsed.path + + if scheme != "file": + raise ValueError("Not a file URI: %r" % uri) + + if not path: + raise ValueError("Invalid file URI: %r" % uri) + + uri = urlunparse(parsed)[7:] + + if is_win: + try: + drive, rest = uri.split(":", 1) + except ValueError: + path = "" + rest = uri.replace("/", "\\") + else: + path = drive[-1] + ":" + rest = rest.replace("/", "\\") + if PY2: + path += unquote(rest) + else: + path += unquote(rest, encoding="utf-8", errors="surrogatepass") + if netloc: + path = "\\\\" + path + if PY2: + path = path.decode("utf-8") + if u"\x00" in path: + raise ValueError("embedded null") + return path + else: + if PY2: + path = unquote(uri) + else: + path = unquote(uri, encoding=_encoding, errors="surrogateescape") + if "\x00" in path: + raise ValueError("embedded null") + return path + + +def fsn2uri(path): + """ + Args: + path (fsnative): The path to convert to an URI + Returns: + `text`: An ASCII only URI + Raises: + TypeError: If no `fsnative` was passed + ValueError: If the path can't be converted + + Takes a `fsnative` path and returns a file URI. + + On Windows non-ASCII characters will be encoded using utf-8 and then + percent encoded. + """ + + path = _fsn2native(path) + + def _quote_path(path): + # RFC 2396 + path = quote(path, "/:@&=+$,") + if PY2: + path = path.decode("ascii") + return path + + if is_win: + buf = ctypes.create_unicode_buffer(winapi.INTERNET_MAX_URL_LENGTH) + length = winapi.DWORD(winapi.INTERNET_MAX_URL_LENGTH) + flags = 0 + try: + winapi.UrlCreateFromPathW(path, buf, ctypes.byref(length), flags) + except WindowsError as e: + raise ValueError(e) + uri = buf[:length.value] + + # For some reason UrlCreateFromPathW escapes some chars outside of + # ASCII and some not. Unquote and re-quote with utf-8. + if PY3: + # latin-1 maps code points directly to bytes, which is what we want + uri = unquote(uri, "latin-1") + else: + # Python 2 does what we want by default + uri = unquote(uri) + + return _quote_path(uri.encode("utf-8", _surrogatepass)) + + else: + return u"file://" + _quote_path(path) diff --git a/libs/mutagen/_senf/_print.py b/libs/mutagen/_senf/_print.py new file mode 100644 index 00000000..63c50fa5 --- /dev/null +++ b/libs/mutagen/_senf/_print.py @@ -0,0 +1,424 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import sys +import os +import ctypes +import re + +from ._fsnative import _encoding, is_win, is_unix, _surrogatepass, bytes2fsn +from ._compat import text_type, PY2, PY3 +from ._winansi import AnsiState, ansi_split +from . import _winapi as winapi + + +def print_(*objects, **kwargs): + """print_(*objects, sep=None, end=None, file=None, flush=False) + + Args: + objects (object): zero or more objects to print + sep (str): Object separator to use, defaults to ``" "`` + end (str): Trailing string to use, defaults to ``"\\n"``. + If end is ``"\\n"`` then `os.linesep` is used. + file (object): A file-like object, defaults to `sys.stdout` + flush (bool): If the file stream should be flushed + Raises: + EnvironmentError + + Like print(), but: + + * Supports printing filenames under Unix + Python 3 and Windows + Python 2 + * Emulates ANSI escape sequence support under Windows + * Never fails due to encoding/decoding errors. Tries hard to get everything + on screen as is, but will fall back to "?" if all fails. + + This does not conflict with ``colorama``, but will not use it on Windows. + """ + + sep = kwargs.get("sep") + sep = sep if sep is not None else " " + end = kwargs.get("end") + end = end if end is not None else "\n" + file = kwargs.get("file") + file = file if file is not None else sys.stdout + flush = bool(kwargs.get("flush", False)) + + if is_win: + _print_windows(objects, sep, end, file, flush) + else: + _print_unix(objects, sep, end, file, flush) + + +def _print_unix(objects, sep, end, file, flush): + """A print_() implementation which writes bytes""" + + encoding = _encoding + + if isinstance(sep, text_type): + sep = sep.encode(encoding, "replace") + if not isinstance(sep, bytes): + raise TypeError + + if isinstance(end, text_type): + end = end.encode(encoding, "replace") + if not isinstance(end, bytes): + raise TypeError + + if end == b"\n": + end = os.linesep + if PY3: + end = end.encode("ascii") + + parts = [] + for obj in objects: + if not isinstance(obj, text_type) and not isinstance(obj, bytes): + obj = text_type(obj) + if isinstance(obj, text_type): + if PY2: + obj = obj.encode(encoding, "replace") + else: + try: + obj = obj.encode(encoding, "surrogateescape") + except UnicodeEncodeError: + obj = obj.encode(encoding, "replace") + assert isinstance(obj, bytes) + parts.append(obj) + + data = sep.join(parts) + end + assert isinstance(data, bytes) + + file = getattr(file, "buffer", file) + + try: + file.write(data) + except TypeError: + if PY3: + # For StringIO, first try with surrogates + surr_data = data.decode(encoding, "surrogateescape") + try: + file.write(surr_data) + except (TypeError, ValueError): + file.write(data.decode(encoding, "replace")) + else: + # for file like objects with don't support bytes + file.write(data.decode(encoding, "replace")) + + if flush: + file.flush() + + +ansi_state = AnsiState() + + +def _print_windows(objects, sep, end, file, flush): + """The windows implementation of print_()""" + + h = winapi.INVALID_HANDLE_VALUE + + try: + fileno = file.fileno() + except (EnvironmentError, AttributeError): + pass + else: + if fileno == 1: + h = winapi.GetStdHandle(winapi.STD_OUTPUT_HANDLE) + elif fileno == 2: + h = winapi.GetStdHandle(winapi.STD_ERROR_HANDLE) + + encoding = _encoding + + parts = [] + for obj in objects: + if isinstance(obj, bytes): + obj = obj.decode(encoding, "replace") + if not isinstance(obj, text_type): + obj = text_type(obj) + parts.append(obj) + + if isinstance(sep, bytes): + sep = sep.decode(encoding, "replace") + if not isinstance(sep, text_type): + raise TypeError + + if isinstance(end, bytes): + end = end.decode(encoding, "replace") + if not isinstance(end, text_type): + raise TypeError + + if end == u"\n": + end = os.linesep + + text = sep.join(parts) + end + assert isinstance(text, text_type) + + is_console = True + if h == winapi.INVALID_HANDLE_VALUE: + is_console = False + else: + # get the default value + info = winapi.CONSOLE_SCREEN_BUFFER_INFO() + if not winapi.GetConsoleScreenBufferInfo(h, ctypes.byref(info)): + is_console = False + + if is_console: + # make sure we flush before we apply any console attributes + file.flush() + + # try to force a utf-8 code page, use the output CP if that fails + cp = winapi.GetConsoleOutputCP() + try: + encoding = "utf-8" + if winapi.SetConsoleOutputCP(65001) == 0: + encoding = None + + for is_ansi, part in ansi_split(text): + if is_ansi: + ansi_state.apply(h, part) + else: + if encoding is not None: + data = part.encode(encoding, _surrogatepass) + else: + data = _encode_codepage(cp, part) + os.write(fileno, data) + finally: + # reset the code page to what we had before + winapi.SetConsoleOutputCP(cp) + else: + # try writing bytes first, so in case of Python 2 StringIO we get + # the same type on all platforms + try: + file.write(text.encode("utf-8", _surrogatepass)) + except (TypeError, ValueError): + file.write(text) + + if flush: + file.flush() + + +def _readline_windows(): + """Raises OSError""" + + try: + fileno = sys.stdin.fileno() + except (EnvironmentError, AttributeError): + fileno = -1 + + # In case stdin is replaced, read from that + if fileno != 0: + return _readline_windows_fallback() + + h = winapi.GetStdHandle(winapi.STD_INPUT_HANDLE) + if h == winapi.INVALID_HANDLE_VALUE: + return _readline_windows_fallback() + + buf_size = 1024 + buf = ctypes.create_string_buffer(buf_size * ctypes.sizeof(winapi.WCHAR)) + read = winapi.DWORD() + + text = u"" + while True: + if winapi.ReadConsoleW( + h, buf, buf_size, ctypes.byref(read), None) == 0: + if not text: + return _readline_windows_fallback() + raise ctypes.WinError() + data = buf[:read.value * ctypes.sizeof(winapi.WCHAR)] + text += data.decode("utf-16-le", _surrogatepass) + if text.endswith(u"\r\n"): + return text[:-2] + + +def _decode_codepage(codepage, data): + """ + Args: + codepage (int) + data (bytes) + Returns: + `text` + + Decodes data using the given codepage. If some data can't be decoded + using the codepage it will not fail. + """ + + assert isinstance(data, bytes) + + if not data: + return u"" + + # get the required buffer length first + length = winapi.MultiByteToWideChar(codepage, 0, data, len(data), None, 0) + if length == 0: + raise ctypes.WinError() + + # now decode + buf = ctypes.create_unicode_buffer(length) + length = winapi.MultiByteToWideChar( + codepage, 0, data, len(data), buf, length) + if length == 0: + raise ctypes.WinError() + + return buf[:] + + +def _encode_codepage(codepage, text): + """ + Args: + codepage (int) + text (text) + Returns: + `bytes` + + Encode text using the given code page. Will not fail if a char + can't be encoded using that codepage. + """ + + assert isinstance(text, text_type) + + if not text: + return b"" + + size = (len(text.encode("utf-16-le", _surrogatepass)) // + ctypes.sizeof(winapi.WCHAR)) + + # get the required buffer size + length = winapi.WideCharToMultiByte( + codepage, 0, text, size, None, 0, None, None) + if length == 0: + raise ctypes.WinError() + + # decode to the buffer + buf = ctypes.create_string_buffer(length) + length = winapi.WideCharToMultiByte( + codepage, 0, text, size, buf, length, None, None) + if length == 0: + raise ctypes.WinError() + return buf[:length] + + +def _readline_windows_fallback(): + # In case reading from the console failed (maybe we get piped data) + # we assume the input was generated according to the output encoding. + # Got any better ideas? + assert is_win + cp = winapi.GetConsoleOutputCP() + data = getattr(sys.stdin, "buffer", sys.stdin).readline().rstrip(b"\r\n") + return _decode_codepage(cp, data) + + +def _readline_default(): + assert is_unix + data = getattr(sys.stdin, "buffer", sys.stdin).readline().rstrip(b"\r\n") + if PY3: + return data.decode(_encoding, "surrogateescape") + else: + return data + + +def _readline(): + if is_win: + return _readline_windows() + else: + return _readline_default() + + +def input_(prompt=None): + """ + Args: + prompt (object): Prints the passed object to stdout without + adding a trailing newline + Returns: + `fsnative` + Raises: + EnvironmentError + + Like :func:`python3:input` but returns a `fsnative` and allows printing + filenames as prompt to stdout. + + Use :func:`fsn2text` on the result if you just want to deal with text. + """ + + if prompt is not None: + print_(prompt, end="") + + return _readline() + + +def _get_file_name_for_handle(handle): + """(Windows only) Returns a file name for a file handle. + + Args: + handle (winapi.HANDLE) + Returns: + `text` or `None` if no file name could be retrieved. + """ + + assert is_win + assert handle != winapi.INVALID_HANDLE_VALUE + + size = winapi.FILE_NAME_INFO.FileName.offset + \ + winapi.MAX_PATH * ctypes.sizeof(winapi.WCHAR) + buf = ctypes.create_string_buffer(size) + + if winapi.GetFileInformationByHandleEx is None: + # Windows XP + return None + + status = winapi.GetFileInformationByHandleEx( + handle, winapi.FileNameInfo, buf, size) + if status == 0: + return None + + name_info = ctypes.cast( + buf, ctypes.POINTER(winapi.FILE_NAME_INFO)).contents + offset = winapi.FILE_NAME_INFO.FileName.offset + data = buf[offset:offset + name_info.FileNameLength] + return bytes2fsn(data, "utf-16-le") + + +def supports_ansi_escape_codes(fd): + """Returns whether the output device is capable of interpreting ANSI escape + codes when :func:`print_` is used. + + Args: + fd (int): file descriptor (e.g. ``sys.stdout.fileno()``) + Returns: + `bool` + """ + + if os.isatty(fd): + return True + + if not is_win: + return False + + # Check for cygwin/msys terminal + handle = winapi._get_osfhandle(fd) + if handle == winapi.INVALID_HANDLE_VALUE: + return False + + if winapi.GetFileType(handle) != winapi.FILE_TYPE_PIPE: + return False + + file_name = _get_file_name_for_handle(handle) + match = re.match( + "^\\\\(cygwin|msys)-[a-z0-9]+-pty[0-9]+-(from|to)-master$", file_name) + return match is not None diff --git a/libs/mutagen/_senf/_stdlib.py b/libs/mutagen/_senf/_stdlib.py new file mode 100644 index 00000000..f3193d33 --- /dev/null +++ b/libs/mutagen/_senf/_stdlib.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import re +import os + +from ._fsnative import path2fsn, fsnative, is_win +from ._compat import PY2 +from ._environ import environ + + +sep = path2fsn(os.sep) +pathsep = path2fsn(os.pathsep) +curdir = path2fsn(os.curdir) +pardir = path2fsn(os.pardir) +altsep = path2fsn(os.altsep) if os.altsep is not None else None +extsep = path2fsn(os.extsep) +devnull = path2fsn(os.devnull) +defpath = path2fsn(os.defpath) + + +def getcwd(): + """Like `os.getcwd` but returns a `fsnative` path + + Returns: + `fsnative` + """ + + if is_win and PY2: + return os.getcwdu() + return os.getcwd() + + +def _get_userdir(user=None): + """Returns the user dir or None""" + + if user is not None and not isinstance(user, fsnative): + raise TypeError + + if is_win: + if "HOME" in environ: + path = environ["HOME"] + elif "USERPROFILE" in environ: + path = environ["USERPROFILE"] + elif "HOMEPATH" in environ and "HOMEDRIVE" in environ: + path = os.path.join(environ["HOMEDRIVE"], environ["HOMEPATH"]) + else: + return + + if user is None: + return path + else: + return os.path.join(os.path.dirname(path), user) + else: + import pwd + + if user is None: + if "HOME" in environ: + return environ["HOME"] + else: + try: + return path2fsn(pwd.getpwuid(os.getuid()).pw_dir) + except KeyError: + return + else: + try: + return path2fsn(pwd.getpwnam(user).pw_dir) + except KeyError: + return + + +def expanduser(path): + """ + Args: + path (pathlike): A path to expand + Returns: + `fsnative` + + Like :func:`python:os.path.expanduser` but supports unicode home + directories under Windows + Python 2 and always returns a `fsnative`. + """ + + path = path2fsn(path) + + if path == "~": + return _get_userdir() + elif path.startswith("~" + sep) or ( + altsep is not None and path.startswith("~" + altsep)): + userdir = _get_userdir() + if userdir is None: + return path + return userdir + path[1:] + elif path.startswith("~"): + sep_index = path.find(sep) + if altsep is not None: + alt_index = path.find(altsep) + if alt_index != -1 and alt_index < sep_index: + sep_index = alt_index + + if sep_index == -1: + user = path[1:] + rest = "" + else: + user = path[1:sep_index] + rest = path[sep_index:] + + userdir = _get_userdir(user) + if userdir is not None: + return userdir + rest + else: + return path + else: + return path + + +def expandvars(path): + """ + Args: + path (pathlike): A path to expand + Returns: + `fsnative` + + Like :func:`python:os.path.expandvars` but supports unicode under Windows + + Python 2 and always returns a `fsnative`. + """ + + path = path2fsn(path) + + def repl_func(match): + return environ.get(match.group(1), match.group(0)) + + path = re.compile(r"\$(\w+)", flags=re.UNICODE).sub(repl_func, path) + if os.name == "nt": + path = re.sub(r"%([^%]+)%", repl_func, path) + return re.sub(r"\$\{([^\}]+)\}", repl_func, path) diff --git a/libs/mutagen/_senf/_temp.py b/libs/mutagen/_senf/_temp.py new file mode 100644 index 00000000..d29b7217 --- /dev/null +++ b/libs/mutagen/_senf/_temp.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import tempfile + +from ._fsnative import path2fsn, fsnative + + +def gettempdir(): + """ + Returns: + `fsnative` + + Like :func:`python3:tempfile.gettempdir`, but always returns a `fsnative` + path + """ + + # FIXME: I don't want to reimplement all that logic, reading env vars etc. + # At least for the default it works. + return path2fsn(tempfile.gettempdir()) + + +def gettempprefix(): + """ + Returns: + `fsnative` + + Like :func:`python3:tempfile.gettempprefix`, but always returns a + `fsnative` path + """ + + return path2fsn(tempfile.gettempprefix()) + + +def mkstemp(suffix=None, prefix=None, dir=None, text=False): + """ + Args: + suffix (`pathlike` or `None`): suffix or `None` to use the default + prefix (`pathlike` or `None`): prefix or `None` to use the default + dir (`pathlike` or `None`): temp dir or `None` to use the default + text (bool): if the file should be opened in text mode + Returns: + Tuple[`int`, `fsnative`]: + A tuple containing the file descriptor and the file path + Raises: + EnvironmentError + + Like :func:`python3:tempfile.mkstemp` but always returns a `fsnative` + path. + """ + + suffix = fsnative() if suffix is None else path2fsn(suffix) + prefix = gettempprefix() if prefix is None else path2fsn(prefix) + dir = gettempdir() if dir is None else path2fsn(dir) + + return tempfile.mkstemp(suffix, prefix, dir, text) + + +def mkdtemp(suffix=None, prefix=None, dir=None): + """ + Args: + suffix (`pathlike` or `None`): suffix or `None` to use the default + prefix (`pathlike` or `None`): prefix or `None` to use the default + dir (`pathlike` or `None`): temp dir or `None` to use the default + Returns: + `fsnative`: A path to a directory + Raises: + EnvironmentError + + Like :func:`python3:tempfile.mkstemp` but always returns a `fsnative` path. + """ + + suffix = fsnative() if suffix is None else path2fsn(suffix) + prefix = gettempprefix() if prefix is None else path2fsn(prefix) + dir = gettempdir() if dir is None else path2fsn(dir) + + return tempfile.mkdtemp(suffix, prefix, dir) diff --git a/libs/mutagen/_senf/_winansi.py b/libs/mutagen/_senf/_winansi.py new file mode 100644 index 00000000..fbbc1c22 --- /dev/null +++ b/libs/mutagen/_senf/_winansi.py @@ -0,0 +1,319 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import ctypes +import re +import atexit + +from . import _winapi as winapi + + +def ansi_parse(code): + """Returns command, (args)""" + + return code[-1:], tuple([int(v or "0") for v in code[2:-1].split(";")]) + + +def ansi_split(text, _re=re.compile(u"(\x1b\\[(\\d*;?)*\\S)")): + """Yields (is_ansi, text)""" + + for part in _re.split(text): + if part: + yield (bool(_re.match(part)), part) + + +class AnsiCommand(object): + TEXT = "m" + + MOVE_UP = "A" + MOVE_DOWN = "B" + MOVE_FORWARD = "C" + MOVE_BACKWARD = "D" + + SET_POS = "H" + SET_POS_ALT = "f" + + SAVE_POS = "s" + RESTORE_POS = "u" + + +class TextAction(object): + RESET_ALL = 0 + + SET_BOLD = 1 + SET_DIM = 2 + SET_ITALIC = 3 + SET_UNDERLINE = 4 + SET_BLINK = 5 + SET_BLINK_FAST = 6 + SET_REVERSE = 7 + SET_HIDDEN = 8 + + RESET_BOLD = 21 + RESET_DIM = 22 + RESET_ITALIC = 23 + RESET_UNDERLINE = 24 + RESET_BLINK = 25 + RESET_BLINK_FAST = 26 + RESET_REVERSE = 27 + RESET_HIDDEN = 28 + + FG_BLACK = 30 + FG_RED = 31 + FG_GREEN = 32 + FG_YELLOW = 33 + FG_BLUE = 34 + FG_MAGENTA = 35 + FG_CYAN = 36 + FG_WHITE = 37 + + FG_DEFAULT = 39 + + FG_LIGHT_BLACK = 90 + FG_LIGHT_RED = 91 + FG_LIGHT_GREEN = 92 + FG_LIGHT_YELLOW = 93 + FG_LIGHT_BLUE = 94 + FG_LIGHT_MAGENTA = 95 + FG_LIGHT_CYAN = 96 + FG_LIGHT_WHITE = 97 + + BG_BLACK = 40 + BG_RED = 41 + BG_GREEN = 42 + BG_YELLOW = 43 + BG_BLUE = 44 + BG_MAGENTA = 45 + BG_CYAN = 46 + BG_WHITE = 47 + + BG_DEFAULT = 49 + + BG_LIGHT_BLACK = 100 + BG_LIGHT_RED = 101 + BG_LIGHT_GREEN = 102 + BG_LIGHT_YELLOW = 103 + BG_LIGHT_BLUE = 104 + BG_LIGHT_MAGENTA = 105 + BG_LIGHT_CYAN = 106 + BG_LIGHT_WHITE = 107 + + +class AnsiState(object): + + def __init__(self): + self.default_attrs = None + + self.bold = False + self.bg_light = False + self.fg_light = False + + self.saved_pos = (0, 0) + + def do_text_action(self, attrs, action): + # In case the external state has changed, apply it it to ours. + # Mostly the first time this is called. + if attrs & winapi.FOREGROUND_INTENSITY and not self.fg_light \ + and not self.bold: + self.fg_light = True + if attrs & winapi.BACKGROUND_INTENSITY and not self.bg_light: + self.bg_light = True + + dark_fg = { + TextAction.FG_BLACK: 0, + TextAction.FG_RED: winapi.FOREGROUND_RED, + TextAction.FG_GREEN: winapi.FOREGROUND_GREEN, + TextAction.FG_YELLOW: + winapi.FOREGROUND_GREEN | winapi.FOREGROUND_RED, + TextAction.FG_BLUE: winapi.FOREGROUND_BLUE, + TextAction.FG_MAGENTA: winapi.FOREGROUND_BLUE | + winapi.FOREGROUND_RED, + TextAction.FG_CYAN: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_GREEN, + TextAction.FG_WHITE: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_GREEN | + winapi.FOREGROUND_RED, + } + + dark_bg = { + TextAction.BG_BLACK: 0, + TextAction.BG_RED: winapi.BACKGROUND_RED, + TextAction.BG_GREEN: winapi.BACKGROUND_GREEN, + TextAction.BG_YELLOW: + winapi.BACKGROUND_GREEN | winapi.BACKGROUND_RED, + TextAction.BG_BLUE: winapi.BACKGROUND_BLUE, + TextAction.BG_MAGENTA: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_RED, + TextAction.BG_CYAN: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_GREEN, + TextAction.BG_WHITE: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_GREEN | + winapi.BACKGROUND_RED, + } + + light_fg = { + TextAction.FG_LIGHT_BLACK: 0, + TextAction.FG_LIGHT_RED: winapi.FOREGROUND_RED, + TextAction.FG_LIGHT_GREEN: winapi.FOREGROUND_GREEN, + TextAction.FG_LIGHT_YELLOW: + winapi.FOREGROUND_GREEN | winapi.FOREGROUND_RED, + TextAction.FG_LIGHT_BLUE: winapi.FOREGROUND_BLUE, + TextAction.FG_LIGHT_MAGENTA: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_RED, + TextAction.FG_LIGHT_CYAN: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_GREEN, + TextAction.FG_LIGHT_WHITE: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_GREEN | + winapi.FOREGROUND_RED, + } + + light_bg = { + TextAction.BG_LIGHT_BLACK: 0, + TextAction.BG_LIGHT_RED: winapi.BACKGROUND_RED, + TextAction.BG_LIGHT_GREEN: winapi.BACKGROUND_GREEN, + TextAction.BG_LIGHT_YELLOW: + winapi.BACKGROUND_GREEN | winapi.BACKGROUND_RED, + TextAction.BG_LIGHT_BLUE: winapi.BACKGROUND_BLUE, + TextAction.BG_LIGHT_MAGENTA: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_RED, + TextAction.BG_LIGHT_CYAN: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_GREEN, + TextAction.BG_LIGHT_WHITE: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_GREEN | + winapi.BACKGROUND_RED, + } + + if action == TextAction.RESET_ALL: + attrs = self.default_attrs + self.bold = self.fg_light = self.bg_light = False + elif action == TextAction.SET_BOLD: + self.bold = True + elif action == TextAction.RESET_BOLD: + self.bold = False + elif action == TextAction.SET_DIM: + self.bold = False + elif action == TextAction.SET_REVERSE: + attrs |= winapi.COMMON_LVB_REVERSE_VIDEO + elif action == TextAction.RESET_REVERSE: + attrs &= ~winapi.COMMON_LVB_REVERSE_VIDEO + elif action == TextAction.SET_UNDERLINE: + attrs |= winapi.COMMON_LVB_UNDERSCORE + elif action == TextAction.RESET_UNDERLINE: + attrs &= ~winapi.COMMON_LVB_UNDERSCORE + elif action == TextAction.FG_DEFAULT: + attrs = (attrs & ~0xF) | (self.default_attrs & 0xF) + self.fg_light = False + elif action == TextAction.BG_DEFAULT: + attrs = (attrs & ~0xF0) | (self.default_attrs & 0xF0) + self.bg_light = False + elif action in dark_fg: + attrs = (attrs & ~0xF) | dark_fg[action] + self.fg_light = False + elif action in dark_bg: + attrs = (attrs & ~0xF0) | dark_bg[action] + self.bg_light = False + elif action in light_fg: + attrs = (attrs & ~0xF) | light_fg[action] + self.fg_light = True + elif action in light_bg: + attrs = (attrs & ~0xF0) | light_bg[action] + self.bg_light = True + + if self.fg_light or self.bold: + attrs |= winapi.FOREGROUND_INTENSITY + else: + attrs &= ~winapi.FOREGROUND_INTENSITY + + if self.bg_light: + attrs |= winapi.BACKGROUND_INTENSITY + else: + attrs &= ~winapi.BACKGROUND_INTENSITY + + return attrs + + def apply(self, handle, code): + buffer_info = winapi.CONSOLE_SCREEN_BUFFER_INFO() + if not winapi.GetConsoleScreenBufferInfo(handle, + ctypes.byref(buffer_info)): + return + + attrs = buffer_info.wAttributes + + # We take the first attrs we see as default + if self.default_attrs is None: + self.default_attrs = attrs + # Make sure that like with linux terminals the program doesn't + # affect the prompt after it exits + atexit.register( + winapi.SetConsoleTextAttribute, handle, self.default_attrs) + + cmd, args = ansi_parse(code) + if cmd == AnsiCommand.TEXT: + for action in args: + attrs = self.do_text_action(attrs, action) + winapi.SetConsoleTextAttribute(handle, attrs) + elif cmd in (AnsiCommand.MOVE_UP, AnsiCommand.MOVE_DOWN, + AnsiCommand.MOVE_FORWARD, AnsiCommand.MOVE_BACKWARD): + + coord = buffer_info.dwCursorPosition + x, y = coord.X, coord.Y + + amount = max(args[0], 1) + + if cmd == AnsiCommand.MOVE_UP: + y -= amount + elif cmd == AnsiCommand.MOVE_DOWN: + y += amount + elif cmd == AnsiCommand.MOVE_FORWARD: + x += amount + elif cmd == AnsiCommand.MOVE_BACKWARD: + x -= amount + + x = max(x, 0) + y = max(y, 0) + winapi.SetConsoleCursorPosition(handle, winapi.COORD(x, y)) + elif cmd in (AnsiCommand.SET_POS, AnsiCommand.SET_POS_ALT): + args = list(args) + while len(args) < 2: + args.append(0) + x, y = args[:2] + + win_rect = buffer_info.srWindow + x += win_rect.Left - 1 + y += win_rect.Top - 1 + + x = max(x, 0) + y = max(y, 0) + winapi.SetConsoleCursorPosition(handle, winapi.COORD(x, y)) + elif cmd == AnsiCommand.SAVE_POS: + win_rect = buffer_info.srWindow + coord = buffer_info.dwCursorPosition + x, y = coord.X, coord.Y + x -= win_rect.Left + y -= win_rect.Top + self.saved_pos = (x, y) + elif cmd == AnsiCommand.RESTORE_POS: + win_rect = buffer_info.srWindow + x, y = self.saved_pos + x += win_rect.Left + y += win_rect.Top + winapi.SetConsoleCursorPosition(handle, winapi.COORD(x, y)) diff --git a/libs/mutagen/_senf/_winapi.py b/libs/mutagen/_senf/_winapi.py new file mode 100644 index 00000000..5e0f7854 --- /dev/null +++ b/libs/mutagen/_senf/_winapi.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import ctypes +from ctypes import WinDLL, CDLL, wintypes + + +shell32 = WinDLL("shell32") +kernel32 = WinDLL("kernel32") +shlwapi = WinDLL("shlwapi") +msvcrt = CDLL("msvcrt") + +GetCommandLineW = kernel32.GetCommandLineW +GetCommandLineW.argtypes = [] +GetCommandLineW.restype = wintypes.LPCWSTR + +CommandLineToArgvW = shell32.CommandLineToArgvW +CommandLineToArgvW.argtypes = [ + wintypes.LPCWSTR, ctypes.POINTER(ctypes.c_int)] +CommandLineToArgvW.restype = ctypes.POINTER(wintypes.LPWSTR) + +LocalFree = kernel32.LocalFree +LocalFree.argtypes = [wintypes.HLOCAL] +LocalFree.restype = wintypes.HLOCAL + +# https://msdn.microsoft.com/en-us/library/windows/desktop/aa383751.aspx +LPCTSTR = ctypes.c_wchar_p +LPWSTR = wintypes.LPWSTR +LPCWSTR = ctypes.c_wchar_p +LPTSTR = LPWSTR +PCWSTR = ctypes.c_wchar_p +PCTSTR = PCWSTR +PWSTR = ctypes.c_wchar_p +PTSTR = PWSTR +LPVOID = wintypes.LPVOID +WCHAR = wintypes.WCHAR +LPSTR = ctypes.c_char_p + +BOOL = wintypes.BOOL +LPBOOL = ctypes.POINTER(BOOL) +UINT = wintypes.UINT +WORD = wintypes.WORD +DWORD = wintypes.DWORD +SHORT = wintypes.SHORT +HANDLE = wintypes.HANDLE +ULONG = wintypes.ULONG +LPCSTR = wintypes.LPCSTR + +STD_INPUT_HANDLE = DWORD(-10) +STD_OUTPUT_HANDLE = DWORD(-11) +STD_ERROR_HANDLE = DWORD(-12) + +INVALID_HANDLE_VALUE = wintypes.HANDLE(-1).value + +INTERNET_MAX_SCHEME_LENGTH = 32 +INTERNET_MAX_PATH_LENGTH = 2048 +INTERNET_MAX_URL_LENGTH = ( + INTERNET_MAX_SCHEME_LENGTH + len("://") + INTERNET_MAX_PATH_LENGTH) + +FOREGROUND_BLUE = 0x0001 +FOREGROUND_GREEN = 0x0002 +FOREGROUND_RED = 0x0004 +FOREGROUND_INTENSITY = 0x0008 + +BACKGROUND_BLUE = 0x0010 +BACKGROUND_GREEN = 0x0020 +BACKGROUND_RED = 0x0040 +BACKGROUND_INTENSITY = 0x0080 + +COMMON_LVB_REVERSE_VIDEO = 0x4000 +COMMON_LVB_UNDERSCORE = 0x8000 + +UrlCreateFromPathW = shlwapi.UrlCreateFromPathW +UrlCreateFromPathW.argtypes = [ + PCTSTR, PTSTR, ctypes.POINTER(DWORD), DWORD] +UrlCreateFromPathW.restype = ctypes.HRESULT + +SetEnvironmentVariableW = kernel32.SetEnvironmentVariableW +SetEnvironmentVariableW.argtypes = [LPCTSTR, LPCTSTR] +SetEnvironmentVariableW.restype = wintypes.BOOL + +GetEnvironmentVariableW = kernel32.GetEnvironmentVariableW +GetEnvironmentVariableW.argtypes = [LPCTSTR, LPTSTR, DWORD] +GetEnvironmentVariableW.restype = DWORD + +GetEnvironmentStringsW = kernel32.GetEnvironmentStringsW +GetEnvironmentStringsW.argtypes = [] +GetEnvironmentStringsW.restype = ctypes.c_void_p + +FreeEnvironmentStringsW = kernel32.FreeEnvironmentStringsW +FreeEnvironmentStringsW.argtypes = [ctypes.c_void_p] +FreeEnvironmentStringsW.restype = ctypes.c_bool + +GetStdHandle = kernel32.GetStdHandle +GetStdHandle.argtypes = [DWORD] +GetStdHandle.restype = HANDLE + + +class COORD(ctypes.Structure): + + _fields_ = [ + ("X", SHORT), + ("Y", SHORT), + ] + + +class SMALL_RECT(ctypes.Structure): + + _fields_ = [ + ("Left", SHORT), + ("Top", SHORT), + ("Right", SHORT), + ("Bottom", SHORT), + ] + + +class CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure): + + _fields_ = [ + ("dwSize", COORD), + ("dwCursorPosition", COORD), + ("wAttributes", WORD), + ("srWindow", SMALL_RECT), + ("dwMaximumWindowSize", COORD), + ] + + +GetConsoleScreenBufferInfo = kernel32.GetConsoleScreenBufferInfo +GetConsoleScreenBufferInfo.argtypes = [ + HANDLE, ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO)] +GetConsoleScreenBufferInfo.restype = BOOL + +GetConsoleOutputCP = kernel32.GetConsoleOutputCP +GetConsoleOutputCP.argtypes = [] +GetConsoleOutputCP.restype = UINT + +SetConsoleOutputCP = kernel32.SetConsoleOutputCP +SetConsoleOutputCP.argtypes = [UINT] +SetConsoleOutputCP.restype = BOOL + +GetConsoleCP = kernel32.GetConsoleCP +GetConsoleCP.argtypes = [] +GetConsoleCP.restype = UINT + +SetConsoleCP = kernel32.SetConsoleCP +SetConsoleCP.argtypes = [UINT] +SetConsoleCP.restype = BOOL + +SetConsoleTextAttribute = kernel32.SetConsoleTextAttribute +SetConsoleTextAttribute.argtypes = [HANDLE, WORD] +SetConsoleTextAttribute.restype = BOOL + +SetConsoleCursorPosition = kernel32.SetConsoleCursorPosition +SetConsoleCursorPosition.argtypes = [HANDLE, COORD] +SetConsoleCursorPosition.restype = BOOL + +ReadConsoleW = kernel32.ReadConsoleW +ReadConsoleW.argtypes = [HANDLE, LPVOID, DWORD, ctypes.POINTER(DWORD), LPVOID] +ReadConsoleW.restype = BOOL + +MultiByteToWideChar = kernel32.MultiByteToWideChar +MultiByteToWideChar.argtypes = [ + UINT, DWORD, LPCSTR, ctypes.c_int, LPWSTR, ctypes.c_int] +MultiByteToWideChar.restype = ctypes.c_int + +WideCharToMultiByte = kernel32.WideCharToMultiByte +WideCharToMultiByte.argtypes = [ + UINT, DWORD, LPCWSTR, ctypes.c_int, LPSTR, ctypes.c_int, LPCSTR, LPBOOL] +WideCharToMultiByte.restpye = ctypes.c_int + +MoveFileW = kernel32.MoveFileW +MoveFileW.argtypes = [LPCTSTR, LPCTSTR] +MoveFileW.restype = BOOL + +if hasattr(kernel32, "GetFileInformationByHandleEx"): + GetFileInformationByHandleEx = kernel32.GetFileInformationByHandleEx + GetFileInformationByHandleEx.argtypes = [ + HANDLE, ctypes.c_int, ctypes.c_void_p, DWORD] + GetFileInformationByHandleEx.restype = BOOL +else: + # Windows XP + GetFileInformationByHandleEx = None + +MAX_PATH = 260 +FileNameInfo = 2 + + +class FILE_NAME_INFO(ctypes.Structure): + _fields_ = [ + ("FileNameLength", DWORD), + ("FileName", WCHAR), + ] + + +_get_osfhandle = msvcrt._get_osfhandle +_get_osfhandle.argtypes = [ctypes.c_int] +_get_osfhandle.restype = HANDLE + +GetFileType = kernel32.GetFileType +GetFileType.argtypes = [HANDLE] +GetFileType.restype = DWORD + +FILE_TYPE_PIPE = 0x0003 diff --git a/libs/mutagen/_tags.py b/libs/mutagen/_tags.py index e6365f0a..c3f2ebf6 100644 --- a/libs/mutagen/_tags.py +++ b/libs/mutagen/_tags.py @@ -2,12 +2,17 @@ # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +from ._util import loadfile class PaddingInfo(object): - """Abstract padding information object. + """PaddingInfo() + + Abstract padding information object. This will be passed to the callback function that can be used for saving tags. @@ -24,16 +29,14 @@ class PaddingInfo(object): The default implementation can be accessed using the :meth:`get_default_padding` method in the callback. - """ - padding = 0 - """The amount of padding left after saving in bytes (can be negative if - more data needs to be added as padding is available) + Attributes: + padding (`int`): The amount of padding left after saving in bytes + (can be negative if more data needs to be added as padding is + available) + size (`int`): The amount of data following the padding """ - size = 0 - """The amount of data following the padding""" - def __init__(self, padding, size): self.padding = padding self.size = size @@ -42,8 +45,8 @@ class PaddingInfo(object): """The default implementation which tries to select a reasonable amount of padding and which might change in future versions. - :return: Amount of padding after saving - :rtype: int + Returns: + int: Amount of padding after saving """ high = 1024 * 10 + self.size // 100 # 10 KiB + 1% of trailing data @@ -81,15 +84,21 @@ class Tags(object): def pprint(self): """ - :returns: tag information - :rtype: mutagen.text + Returns: + text: tag information """ raise NotImplementedError class Metadata(Tags): - """Like :class:`Tags` but for standalone tagging formats that are not + """Metadata(filething=None, **kwargs) + + Args: + filething (filething): a filename or a file-like object or `None` + to create an empty instance (like ``ID3()``) + + Like :class:`Tags` but for standalone tagging formats that are not solely managed by a container format. Provides methods to load, save and delete tags. @@ -101,24 +110,37 @@ class Metadata(Tags): if args or kwargs: self.load(*args, **kwargs) - def load(self, filename, **kwargs): + @loadfile() + def load(self, filething, **kwargs): raise NotImplementedError - def save(self, filename=None): - """Save changes to a file. + @loadfile(writable=False) + def save(self, filething, **kwargs): + """save(filething=None, **kwargs) - :raises mutagen.MutagenError: if saving wasn't possible + Save changes to a file. + + Args: + filething (filething): or `None` + Raises: + MutagenError: if saving wasn't possible """ raise NotImplementedError - def delete(self, filename=None): - """Remove tags from a file. + @loadfile(writable=False) + def delete(self, filething): + """delete(filething=None) + + Remove tags from a file. In most cases this means any traces of the tag will be removed from the file. - :raises mutagen.MutagenError: if deleting wasn't possible + Args: + filething (filething): or `None` + Raises: + MutagenError: if deleting wasn't possible """ raise NotImplementedError diff --git a/libs/mutagen/_tools/__init__.py b/libs/mutagen/_tools/__init__.py new file mode 100644 index 00000000..3e6b1556 --- /dev/null +++ b/libs/mutagen/_tools/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. diff --git a/libs/mutagen/_tools/_util.py b/libs/mutagen/_tools/_util.py new file mode 100644 index 00000000..4e050769 --- /dev/null +++ b/libs/mutagen/_tools/_util.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import os +import signal +import contextlib +import optparse + +from mutagen._senf import print_ +from mutagen._compat import text_type, iterbytes + + +def split_escape(string, sep, maxsplit=None, escape_char="\\"): + """Like unicode/str/bytes.split but allows for the separator to be escaped + + If passed unicode/str/bytes will only return list of unicode/str/bytes. + """ + + assert len(sep) == 1 + assert len(escape_char) == 1 + + if isinstance(string, bytes): + if isinstance(escape_char, text_type): + escape_char = escape_char.encode("ascii") + iter_ = iterbytes + else: + iter_ = iter + + if maxsplit is None: + maxsplit = len(string) + + empty = string[:0] + result = [] + current = empty + escaped = False + for char in iter_(string): + if escaped: + if char != escape_char and char != sep: + current += escape_char + current += char + escaped = False + else: + if char == escape_char: + escaped = True + elif char == sep and len(result) < maxsplit: + result.append(current) + current = empty + else: + current += char + result.append(current) + return result + + +class SignalHandler(object): + + def __init__(self): + self._interrupted = False + self._nosig = False + self._init = False + + def init(self): + signal.signal(signal.SIGINT, self._handler) + signal.signal(signal.SIGTERM, self._handler) + if os.name != "nt": + signal.signal(signal.SIGHUP, self._handler) + + def _handler(self, signum, frame): + self._interrupted = True + if not self._nosig: + raise SystemExit("Aborted...") + + @contextlib.contextmanager + def block(self): + """While this context manager is active any signals for aborting + the process will be queued and exit the program once the context + is left. + """ + + self._nosig = True + yield + self._nosig = False + if self._interrupted: + raise SystemExit("Aborted...") + + +class OptionParser(optparse.OptionParser): + """OptionParser subclass which supports printing Unicode under Windows""" + + def print_help(self, file=None): + print_(self.format_help(), file=file) diff --git a/libs/mutagen/_tools/mid3cp.py b/libs/mutagen/_tools/mid3cp.py new file mode 100644 index 00000000..1339548d --- /dev/null +++ b/libs/mutagen/_tools/mid3cp.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 Marcus Sundman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""A program replicating the functionality of id3lib's id3cp, using mutagen for +tag loading and saving. +""" + +import sys +import os.path + +import mutagen +import mutagen.id3 +from mutagen._senf import print_, argv +from mutagen._compat import text_type + +from ._util import SignalHandler, OptionParser + + +VERSION = (0, 1) +_sig = SignalHandler() + + +def printerr(*args, **kwargs): + kwargs.setdefault("file", sys.stderr) + print_(*args, **kwargs) + + +class ID3OptionParser(OptionParser): + def __init__(self): + mutagen_version = mutagen.version_string + my_version = ".".join(map(str, VERSION)) + version = "mid3cp %s\nUses Mutagen %s" % (my_version, mutagen_version) + self.disable_interspersed_args() + OptionParser.__init__( + self, version=version, + usage="%prog [option(s)] <src> <dst>", + description=("Copies ID3 tags from <src> to <dst>. Mutagen-based " + "replacement for id3lib's id3cp.")) + + +def copy(src, dst, merge, write_v1=True, excluded_tags=None, verbose=False): + """Returns 0 on success""" + + if excluded_tags is None: + excluded_tags = [] + + try: + id3 = mutagen.id3.ID3(src, translate=False) + except mutagen.id3.ID3NoHeaderError: + print_(u"No ID3 header found in ", src, file=sys.stderr) + return 1 + except Exception as err: + print_(str(err), file=sys.stderr) + return 1 + + if verbose: + print_(u"File", src, u"contains:", file=sys.stderr) + print_(id3.pprint(), file=sys.stderr) + + for tag in excluded_tags: + id3.delall(tag) + + if merge: + try: + target = mutagen.id3.ID3(dst, translate=False) + except mutagen.id3.ID3NoHeaderError: + # no need to merge + pass + except Exception as err: + print_(str(err), file=sys.stderr) + return 1 + else: + for frame in id3.values(): + target.add(frame) + + id3 = target + + # if the source is 2.3 save it as 2.3 + if id3.version < (2, 4, 0): + id3.update_to_v23() + v2_version = 3 + else: + id3.update_to_v24() + v2_version = 4 + + try: + id3.save(dst, v1=(2 if write_v1 else 0), v2_version=v2_version) + except Exception as err: + print_(u"Error saving", dst, u":\n%s" % text_type(err), + file=sys.stderr) + return 1 + else: + if verbose: + print_(u"Successfully saved", dst, file=sys.stderr) + return 0 + + +def main(argv): + parser = ID3OptionParser() + parser.add_option("-v", "--verbose", action="store_true", dest="verbose", + help="print out saved tags", default=False) + parser.add_option("--write-v1", action="store_true", dest="write_v1", + default=False, help="write id3v1 tags") + parser.add_option("-x", "--exclude-tag", metavar="TAG", action="append", + dest="x", help="exclude the specified tag", default=[]) + parser.add_option("--merge", action="store_true", + help="Copy over frames instead of the whole ID3 tag", + default=False) + (options, args) = parser.parse_args(argv[1:]) + + if len(args) != 2: + parser.print_help(file=sys.stderr) + return 1 + + (src, dst) = args + + if not os.path.isfile(src): + print_(u"File not found:", src, file=sys.stderr) + parser.print_help(file=sys.stderr) + return 1 + + if not os.path.isfile(dst): + printerr(u"File not found:", dst, file=sys.stderr) + parser.print_help(file=sys.stderr) + return 1 + + # Strip tags - "-x FOO" adds whitespace at the beginning of the tag name + excluded_tags = [x.strip() for x in options.x] + + with _sig.block(): + return copy(src, dst, options.merge, options.write_v1, excluded_tags, + options.verbose) + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/libs/mutagen/_tools/mid3iconv.py b/libs/mutagen/_tools/mid3iconv.py new file mode 100644 index 00000000..554f6bb8 --- /dev/null +++ b/libs/mutagen/_tools/mid3iconv.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# Copyright 2006 Emfox Zhou <EmfoxZhou@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +""" +ID3iconv is a Java based ID3 encoding convertor, here's the Python version. +""" + +import sys +import locale + +import mutagen +import mutagen.id3 +from mutagen._senf import argv, print_, fsnative +from mutagen._compat import text_type + +from ._util import SignalHandler, OptionParser + + +VERSION = (0, 3) +_sig = SignalHandler() + + +def getpreferredencoding(): + return locale.getpreferredencoding() or "utf-8" + + +def isascii(string): + """Checks whether a unicode string is non-empty and contains only ASCII + characters. + """ + if not string: + return False + + try: + string.encode('ascii') + except UnicodeEncodeError: + return False + + return True + + +class ID3OptionParser(OptionParser): + def __init__(self): + mutagen_version = ".".join(map(str, mutagen.version)) + my_version = ".".join(map(str, VERSION)) + version = "mid3iconv %s\nUses Mutagen %s" % ( + my_version, mutagen_version) + return OptionParser.__init__( + self, version=version, + usage="%prog [OPTION] [FILE]...", + description=("Mutagen-based replacement the id3iconv utility, " + "which converts ID3 tags from legacy encodings " + "to Unicode and stores them using the ID3v2 format.")) + + def format_help(self, *args, **kwargs): + text = OptionParser.format_help(self, *args, **kwargs) + return text + "\nFiles are updated in-place, so use --dry-run first.\n" + + +def update(options, filenames): + encoding = options.encoding or getpreferredencoding() + verbose = options.verbose + noupdate = options.noupdate + force_v1 = options.force_v1 + remove_v1 = options.remove_v1 + + def conv(uni): + return uni.encode('iso-8859-1').decode(encoding) + + for filename in filenames: + with _sig.block(): + if verbose != "quiet": + print_(u"Updating", filename) + + if has_id3v1(filename) and not noupdate and force_v1: + mutagen.id3.delete(filename, False, True) + + try: + id3 = mutagen.id3.ID3(filename) + except mutagen.id3.ID3NoHeaderError: + if verbose != "quiet": + print_(u"No ID3 header found; skipping...") + continue + except Exception as err: + print_(text_type(err), file=sys.stderr) + continue + + for tag in filter(lambda t: t.startswith(("T", "COMM")), id3): + frame = id3[tag] + if isinstance(frame, mutagen.id3.TimeStampTextFrame): + # non-unicode fields + continue + try: + text = frame.text + except AttributeError: + continue + try: + text = [conv(x) for x in frame.text] + except (UnicodeError, LookupError): + continue + else: + frame.text = text + if not text or min(map(isascii, text)): + frame.encoding = 3 + else: + frame.encoding = 1 + + if verbose == "debug": + print_(id3.pprint()) + + if not noupdate: + if remove_v1: + id3.save(filename, v1=False) + else: + id3.save(filename) + + +def has_id3v1(filename): + try: + with open(filename, 'rb') as f: + f.seek(-128, 2) + return f.read(3) == b"TAG" + except IOError: + return False + + +def main(argv): + parser = ID3OptionParser() + parser.add_option( + "-e", "--encoding", metavar="ENCODING", action="store", + type="string", dest="encoding", + help=("Specify original tag encoding (default is %s)" % ( + getpreferredencoding()))) + parser.add_option( + "-p", "--dry-run", action="store_true", dest="noupdate", + help="Do not actually modify files") + parser.add_option( + "--force-v1", action="store_true", dest="force_v1", + help="Use an ID3v1 tag even if an ID3v2 tag is present") + parser.add_option( + "--remove-v1", action="store_true", dest="remove_v1", + help="Remove v1 tag after processing the files") + parser.add_option( + "-q", "--quiet", action="store_const", dest="verbose", + const="quiet", help="Only output errors") + parser.add_option( + "-d", "--debug", action="store_const", dest="verbose", + const="debug", help="Output updated tags") + + for i, arg in enumerate(argv): + if arg == "-v1": + argv[i] = fsnative(u"--force-v1") + elif arg == "-removev1": + argv[i] = fsnative(u"--remove-v1") + + (options, args) = parser.parse_args(argv[1:]) + + if args: + update(options, args) + else: + parser.print_help() + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/libs/mutagen/_tools/mid3v2.py b/libs/mutagen/_tools/mid3v2.py new file mode 100644 index 00000000..2a79e3b8 --- /dev/null +++ b/libs/mutagen/_tools/mid3v2.py @@ -0,0 +1,490 @@ +# -*- coding: utf-8 -*- +# Copyright 2005 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Pretend to be /usr/bin/id3v2 from id3lib, sort of.""" + +import sys +import codecs +import mimetypes + +from optparse import SUPPRESS_HELP + +import mutagen +import mutagen.id3 +from mutagen.id3 import Encoding, PictureType +from mutagen._senf import fsnative, print_, argv, fsn2text, fsn2bytes, \ + bytes2fsn +from mutagen._compat import PY2, text_type + +from ._util import split_escape, SignalHandler, OptionParser + + +VERSION = (1, 3) +_sig = SignalHandler() + +global verbose +verbose = True + + +class ID3OptionParser(OptionParser): + def __init__(self): + mutagen_version = ".".join(map(str, mutagen.version)) + my_version = ".".join(map(str, VERSION)) + version = "mid3v2 %s\nUses Mutagen %s" % (my_version, mutagen_version) + self.edits = [] + OptionParser.__init__( + self, version=version, + usage="%prog [OPTION] [FILE]...", + description="Mutagen-based replacement for id3lib's id3v2.") + + def format_help(self, *args, **kwargs): + text = OptionParser.format_help(self, *args, **kwargs) + return text + """\ +You can set the value for any ID3v2 frame by using '--' and then a frame ID. +For example: + mid3v2 --TIT3 "Monkey!" file.mp3 +would set the "Subtitle/Description" frame to "Monkey!". + +Any editing operation will cause the ID3 tag to be upgraded to ID3v2.4. +""" + + +def list_frames(option, opt, value, parser): + items = mutagen.id3.Frames.items() + for name, frame in sorted(items): + print_(u" --%s %s" % (name, frame.__doc__.split("\n")[0])) + raise SystemExit + + +def list_frames_2_2(option, opt, value, parser): + items = mutagen.id3.Frames_2_2.items() + items.sort() + for name, frame in items: + print_(u" --%s %s" % (name, frame.__doc__.split("\n")[0])) + raise SystemExit + + +def list_genres(option, opt, value, parser): + for i, genre in enumerate(mutagen.id3.TCON.GENRES): + print_(u"%3d: %s" % (i, genre)) + raise SystemExit + + +def delete_tags(filenames, v1, v2): + for filename in filenames: + with _sig.block(): + if verbose: + print_(u"deleting ID3 tag info in", filename, file=sys.stderr) + mutagen.id3.delete(filename, v1, v2) + + +def delete_frames(deletes, filenames): + + try: + deletes = frame_from_fsnative(deletes) + except ValueError as err: + print_(text_type(err), file=sys.stderr) + + frames = deletes.split(",") + + for filename in filenames: + with _sig.block(): + if verbose: + print_(u"deleting %s from" % deletes, filename, + file=sys.stderr) + try: + id3 = mutagen.id3.ID3(filename) + except mutagen.id3.ID3NoHeaderError: + if verbose: + print_(u"No ID3 header found; skipping.", file=sys.stderr) + except Exception as err: + print_(text_type(err), file=sys.stderr) + raise SystemExit(1) + else: + for frame in frames: + id3.delall(frame) + id3.save() + + +def frame_from_fsnative(arg): + """Takes item from argv and returns ascii native str + or raises ValueError. + """ + + assert isinstance(arg, fsnative) + + text = fsn2text(arg, strict=True) + if PY2: + return text.encode("ascii") + else: + return text.encode("ascii").decode("ascii") + + +def value_from_fsnative(arg, escape): + """Takes an item from argv and returns a text_type value without + surrogate escapes or raises ValueError. + """ + + assert isinstance(arg, fsnative) + + if escape: + bytes_ = fsn2bytes(arg) + if PY2: + bytes_ = bytes_.decode("string_escape") + else: + bytes_ = codecs.escape_decode(bytes_)[0] + arg = bytes2fsn(bytes_) + + text = fsn2text(arg, strict=True) + return text + + +def error(*args): + print_(*args, file=sys.stderr) + raise SystemExit(1) + + +def get_frame_encoding(frame_id, value): + if frame_id == "APIC": + # See https://github.com/beetbox/beets/issues/899#issuecomment-62437773 + return Encoding.UTF16 if value else Encoding.LATIN1 + else: + return Encoding.UTF8 + + +def write_files(edits, filenames, escape): + # unescape escape sequences and decode values + encoded_edits = [] + for frame, value in edits: + if not value: + continue + + try: + frame = frame_from_fsnative(frame) + except ValueError as err: + print_(text_type(err), file=sys.stderr) + + assert isinstance(frame, str) + + # strip "--" + frame = frame[2:] + + try: + value = value_from_fsnative(value, escape) + except ValueError as err: + error(u"%s: %s" % (frame, text_type(err))) + + assert isinstance(value, text_type) + + encoded_edits.append((frame, value)) + edits = encoded_edits + + # preprocess: + # for all [frame,value] pairs in the edits list + # gather values for identical frames into a list + tmp = {} + for frame, value in edits: + if frame in tmp: + tmp[frame].append(value) + else: + tmp[frame] = [value] + # edits is now a dictionary of frame -> [list of values] + edits = tmp + + # escape also enables escaping of the split separator + if escape: + string_split = split_escape + else: + string_split = lambda s, *args, **kwargs: s.split(*args, **kwargs) + + for filename in filenames: + with _sig.block(): + if verbose: + print_(u"Writing", filename, file=sys.stderr) + try: + id3 = mutagen.id3.ID3(filename) + except mutagen.id3.ID3NoHeaderError: + if verbose: + print_(u"No ID3 header found; creating a new tag", + file=sys.stderr) + id3 = mutagen.id3.ID3() + except Exception as err: + print_(str(err), file=sys.stderr) + continue + for (frame, vlist) in edits.items(): + if frame == "POPM": + for value in vlist: + values = string_split(value, ":") + if len(values) == 1: + email, rating, count = values[0], 0, 0 + elif len(values) == 2: + email, rating, count = values[0], values[1], 0 + else: + email, rating, count = values + + frame = mutagen.id3.POPM( + email=email, rating=int(rating), count=int(count)) + id3.add(frame) + elif frame == "APIC": + for value in vlist: + values = string_split(value, ":") + # FIXME: doesn't support filenames with an invalid + # encoding since we have already decoded at that point + fn = values[0] + + if len(values) >= 2: + desc = values[1] + else: + desc = u"cover" + + if len(values) >= 3: + try: + picture_type = int(values[2]) + except ValueError: + error(u"Invalid picture type: %r" % values[1]) + else: + picture_type = PictureType.COVER_FRONT + + if len(values) >= 4: + mime = values[3] + else: + mime = mimetypes.guess_type(fn)[0] or "image/jpeg" + + if len(values) >= 5: + error("APIC: Invalid format") + + encoding = get_frame_encoding(frame, desc) + + try: + with open(fn, "rb") as h: + data = h.read() + except IOError as e: + error(text_type(e)) + + frame = mutagen.id3.APIC(encoding=encoding, mime=mime, + desc=desc, type=picture_type, data=data) + + id3.add(frame) + elif frame == "COMM": + for value in vlist: + values = string_split(value, ":") + if len(values) == 1: + value, desc, lang = values[0], "", "eng" + elif len(values) == 2: + desc, value, lang = values[0], values[1], "eng" + else: + value = ":".join(values[1:-1]) + desc, lang = values[0], values[-1] + frame = mutagen.id3.COMM( + encoding=3, text=value, lang=lang, desc=desc) + id3.add(frame) + elif frame == "USLT": + for value in vlist: + values = string_split(value, ":") + if len(values) == 1: + value, desc, lang = values[0], "", "eng" + elif len(values) == 2: + desc, value, lang = values[0], values[1], "eng" + else: + value = ":".join(values[1:-1]) + desc, lang = values[0], values[-1] + frame = mutagen.id3.USLT( + encoding=3, text=value, lang=lang, desc=desc) + id3.add(frame) + elif frame == "UFID": + for value in vlist: + values = string_split(value, ":") + if len(values) != 2: + error(u"Invalid value: %r" % values) + owner = values[0] + data = values[1].encode("utf-8") + frame = mutagen.id3.UFID(owner=owner, data=data) + id3.add(frame) + elif frame == "TXXX": + for value in vlist: + values = string_split(value, ":", 1) + if len(values) == 1: + desc, value = "", values[0] + else: + desc, value = values[0], values[1] + frame = mutagen.id3.TXXX( + encoding=3, text=value, desc=desc) + id3.add(frame) + elif frame == "WXXX": + for value in vlist: + values = string_split(value, ":", 1) + if len(values) == 1: + desc, value = "", values[0] + else: + desc, value = values[0], values[1] + frame = mutagen.id3.WXXX( + encoding=3, url=value, desc=desc) + id3.add(frame) + elif issubclass(mutagen.id3.Frames[frame], + mutagen.id3.UrlFrame): + frame = mutagen.id3.Frames[frame]( + encoding=3, url=vlist[-1]) + id3.add(frame) + else: + frame = mutagen.id3.Frames[frame](encoding=3, text=vlist) + id3.add(frame) + id3.save(filename) + + +def list_tags(filenames): + for filename in filenames: + print_("IDv2 tag info for", filename) + try: + id3 = mutagen.id3.ID3(filename, translate=False) + except mutagen.id3.ID3NoHeaderError: + print_(u"No ID3 header found; skipping.") + except Exception as err: + print_(text_type(err), file=sys.stderr) + raise SystemExit(1) + else: + print_(id3.pprint()) + + +def list_tags_raw(filenames): + for filename in filenames: + print_("Raw IDv2 tag info for", filename) + try: + id3 = mutagen.id3.ID3(filename, translate=False) + except mutagen.id3.ID3NoHeaderError: + print_(u"No ID3 header found; skipping.") + except Exception as err: + print_(text_type(err), file=sys.stderr) + raise SystemExit(1) + else: + for frame in id3.values(): + print_(text_type(repr(frame))) + + +def main(argv): + parser = ID3OptionParser() + parser.add_option( + "-v", "--verbose", action="store_true", dest="verbose", default=False, + help="be verbose") + parser.add_option( + "-q", "--quiet", action="store_false", dest="verbose", + help="be quiet (the default)") + parser.add_option( + "-e", "--escape", action="store_true", default=False, + help="enable interpretation of backslash escapes") + parser.add_option( + "-f", "--list-frames", action="callback", callback=list_frames, + help="Display all possible frames for ID3v2.3 / ID3v2.4") + parser.add_option( + "--list-frames-v2.2", action="callback", callback=list_frames_2_2, + help="Display all possible frames for ID3v2.2") + parser.add_option( + "-L", "--list-genres", action="callback", callback=list_genres, + help="Lists all ID3v1 genres") + parser.add_option( + "-l", "--list", action="store_const", dest="action", const="list", + help="Lists the tag(s) on the open(s)") + parser.add_option( + "--list-raw", action="store_const", dest="action", const="list-raw", + help="Lists the tag(s) on the open(s) in Python format") + parser.add_option( + "-d", "--delete-v2", action="store_const", dest="action", + const="delete-v2", help="Deletes ID3v2 tags") + parser.add_option( + "-s", "--delete-v1", action="store_const", dest="action", + const="delete-v1", help="Deletes ID3v1 tags") + parser.add_option( + "-D", "--delete-all", action="store_const", dest="action", + const="delete-v1-v2", help="Deletes ID3v1 and ID3v2 tags") + parser.add_option( + '--delete-frames', metavar='FID1,FID2,...', action='store', + dest='deletes', default='', help="Delete the given frames") + parser.add_option( + "-C", "--convert", action="store_const", dest="action", + const="convert", + help="Convert tags to ID3v2.4 (any editing will do this)") + + parser.add_option( + "-a", "--artist", metavar='"ARTIST"', action="callback", + help="Set the artist information", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TPE1"), + args[2]))) + parser.add_option( + "-A", "--album", metavar='"ALBUM"', action="callback", + help="Set the album title information", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TALB"), + args[2]))) + parser.add_option( + "-t", "--song", metavar='"SONG"', action="callback", + help="Set the song title information", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TIT2"), + args[2]))) + parser.add_option( + "-c", "--comment", metavar='"DESCRIPTION":"COMMENT":"LANGUAGE"', + action="callback", help="Set the comment information", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--COMM"), + args[2]))) + parser.add_option( + "-p", "--picture", + metavar='"FILENAME":"DESCRIPTION":"IMAGE-TYPE":"MIME-TYPE"', + action="callback", help="Set the picture", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--APIC"), + args[2]))) + parser.add_option( + "-g", "--genre", metavar='"GENRE"', action="callback", + help="Set the genre or genre number", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TCON"), + args[2]))) + parser.add_option( + "-y", "--year", "--date", metavar='YYYY[-MM-DD]', action="callback", + help="Set the year/date", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TDRC"), + args[2]))) + parser.add_option( + "-T", "--track", metavar='"num/num"', action="callback", + help="Set the track number/(optional) total tracks", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TRCK"), + args[2]))) + + for key, frame in mutagen.id3.Frames.items(): + if (issubclass(frame, mutagen.id3.TextFrame) + or issubclass(frame, mutagen.id3.UrlFrame) + or issubclass(frame, mutagen.id3.POPM) + or frame in (mutagen.id3.APIC, mutagen.id3.UFID, + mutagen.id3.USLT)): + parser.add_option( + "--" + key, action="callback", help=SUPPRESS_HELP, + type='string', metavar="value", # optparse blows up with this + callback=lambda *args: args[3].edits.append(args[1:3])) + + (options, args) = parser.parse_args(argv[1:]) + global verbose + verbose = options.verbose + + if args: + if parser.edits or options.deletes: + if options.deletes: + delete_frames(options.deletes, args) + if parser.edits: + write_files(parser.edits, args, options.escape) + elif options.action in [None, 'list']: + list_tags(args) + elif options.action == "list-raw": + list_tags_raw(args) + elif options.action == "convert": + write_files([], args, options.escape) + elif options.action.startswith("delete"): + delete_tags(args, "v1" in options.action, "v2" in options.action) + else: + parser.print_help() + else: + parser.print_help() + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/libs/mutagen/_tools/moggsplit.py b/libs/mutagen/_tools/moggsplit.py new file mode 100644 index 00000000..710f0dfe --- /dev/null +++ b/libs/mutagen/_tools/moggsplit.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Split a multiplex/chained Ogg file into its component parts.""" + +import os + +import mutagen.ogg +from mutagen._senf import argv + +from ._util import SignalHandler, OptionParser + + +_sig = SignalHandler() + + +def main(argv): + from mutagen.ogg import OggPage + parser = OptionParser( + usage="%prog [options] filename.ogg ...", + description="Split Ogg logical streams using Mutagen.", + version="Mutagen %s" % ".".join(map(str, mutagen.version)) + ) + + parser.add_option( + "--extension", dest="extension", default="ogg", metavar='ext', + help="use this extension (default 'ogg')") + parser.add_option( + "--pattern", dest="pattern", default="%(base)s-%(stream)d.%(ext)s", + metavar='pattern', help="name files using this pattern") + parser.add_option( + "--m3u", dest="m3u", action="store_true", default=False, + help="generate an m3u (playlist) file") + + (options, args) = parser.parse_args(argv[1:]) + if not args: + raise SystemExit(parser.print_help() or 1) + + format = {'ext': options.extension} + for filename in args: + with _sig.block(): + fileobjs = {} + format["base"] = os.path.splitext(os.path.basename(filename))[0] + with open(filename, "rb") as fileobj: + if options.m3u: + m3u = open(format["base"] + ".m3u", "w") + fileobjs["m3u"] = m3u + else: + m3u = None + while True: + try: + page = OggPage(fileobj) + except EOFError: + break + else: + format["stream"] = page.serial + if page.serial not in fileobjs: + new_filename = options.pattern % format + new_fileobj = open(new_filename, "wb") + fileobjs[page.serial] = new_fileobj + if m3u: + m3u.write(new_filename + "\r\n") + fileobjs[page.serial].write(page.write()) + for f in fileobjs.values(): + f.close() + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/libs/mutagen/_tools/mutagen_inspect.py b/libs/mutagen/_tools/mutagen_inspect.py new file mode 100644 index 00000000..6bd6c614 --- /dev/null +++ b/libs/mutagen/_tools/mutagen_inspect.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# Copyright 2005 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Full tag list for any given file.""" + +from mutagen._senf import print_, argv +from mutagen._compat import text_type + +from ._util import SignalHandler, OptionParser + + +_sig = SignalHandler() + + +def main(argv): + from mutagen import File + + parser = OptionParser() + parser.add_option("--no-flac", help="Compatibility; does nothing.") + parser.add_option("--no-mp3", help="Compatibility; does nothing.") + parser.add_option("--no-apev2", help="Compatibility; does nothing.") + + (options, args) = parser.parse_args(argv[1:]) + if not args: + raise SystemExit(parser.print_help() or 1) + + for filename in args: + print_(u"--", filename) + try: + print_(u"-", File(filename).pprint()) + except AttributeError: + print_(u"- Unknown file type") + except Exception as err: + print_(text_type(err)) + print_(u"") + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/libs/mutagen/_tools/mutagen_pony.py b/libs/mutagen/_tools/mutagen_pony.py new file mode 100644 index 00000000..e4a496c7 --- /dev/null +++ b/libs/mutagen/_tools/mutagen_pony.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# Copyright 2005 Joe Wreschnig, Michael Urman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import os +import sys +import traceback + +from mutagen._senf import print_, argv + +from ._util import SignalHandler + + +class Report(object): + def __init__(self, pathname): + self.name = pathname + self.files = 0 + self.unsync = 0 + self.missings = 0 + self.errors = [] + self.exceptions = {} + self.versions = {} + + def missing(self, filename): + self.missings += 1 + self.files += 1 + + def error(self, filename): + Ex, value, trace = sys.exc_info() + self.exceptions.setdefault(Ex, 0) + self.exceptions[Ex] += 1 + self.errors.append((filename, Ex, value, trace)) + self.files += 1 + + def success(self, id3): + self.versions.setdefault(id3.version, 0) + self.versions[id3.version] += 1 + self.files += 1 + if id3.f_unsynch: + self.unsync += 1 + + def __str__(self): + strings = ["-- Report for %s --" % self.name] + if self.files == 0: + return strings[0] + "\n" + "No MP3 files found.\n" + + good = self.files - len(self.errors) + strings.append("Loaded %d/%d files (%d%%)" % ( + good, self.files, (float(good) / self.files) * 100)) + strings.append("%d files with unsynchronized frames." % self.unsync) + strings.append("%d files without tags." % self.missings) + + strings.append("\nID3 Versions:") + items = list(self.versions.items()) + items.sort() + for v, i in items: + strings.append(" %s\t%d" % (".".join(map(str, v)), i)) + + if self.exceptions: + strings.append("\nExceptions:") + items = list(self.exceptions.items()) + items.sort() + for Ex, i in items: + strings.append(" %-20s\t%d" % (Ex.__name__, i)) + + if self.errors: + strings.append("\nERRORS:\n") + for filename, Ex, value, trace in self.errors: + strings.append("\nReading %s:" % filename) + strings.append( + "".join(traceback.format_exception(Ex, value, trace)[1:])) + else: + strings.append("\nNo errors!") + + return("\n".join(strings)) + + +def check_dir(path): + from mutagen.mp3 import MP3 + + rep = Report(path) + print_(u"Scanning", path) + for path, dirs, files in os.walk(path): + files.sort() + for fn in files: + if not fn.lower().endswith('.mp3'): + continue + ffn = os.path.join(path, fn) + try: + mp3 = MP3(ffn) + except Exception: + rep.error(ffn) + else: + if mp3.tags is None: + rep.missing(ffn) + else: + rep.success(mp3.tags) + + print_(str(rep)) + + +def main(argv): + if len(argv) == 1: + print_(u"Usage:", argv[0], u"directory ...") + else: + for path in argv[1:]: + check_dir(path) + + +def entry_point(): + SignalHandler().init() + return main(argv) diff --git a/libs/mutagen/_toolsutil.py b/libs/mutagen/_toolsutil.py deleted file mode 100644 index e9074b71..00000000 --- a/libs/mutagen/_toolsutil.py +++ /dev/null @@ -1,231 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2015 Christoph Reiter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -import os -import sys -import signal -import locale -import contextlib -import optparse -import ctypes - -from ._compat import text_type, PY2, PY3, iterbytes - - -def split_escape(string, sep, maxsplit=None, escape_char="\\"): - """Like unicode/str/bytes.split but allows for the separator to be escaped - - If passed unicode/str/bytes will only return list of unicode/str/bytes. - """ - - assert len(sep) == 1 - assert len(escape_char) == 1 - - if isinstance(string, bytes): - if isinstance(escape_char, text_type): - escape_char = escape_char.encode("ascii") - iter_ = iterbytes - else: - iter_ = iter - - if maxsplit is None: - maxsplit = len(string) - - empty = string[:0] - result = [] - current = empty - escaped = False - for char in iter_(string): - if escaped: - if char != escape_char and char != sep: - current += escape_char - current += char - escaped = False - else: - if char == escape_char: - escaped = True - elif char == sep and len(result) < maxsplit: - result.append(current) - current = empty - else: - current += char - result.append(current) - return result - - -class SignalHandler(object): - - def __init__(self): - self._interrupted = False - self._nosig = False - self._init = False - - def init(self): - signal.signal(signal.SIGINT, self._handler) - signal.signal(signal.SIGTERM, self._handler) - if os.name != "nt": - signal.signal(signal.SIGHUP, self._handler) - - def _handler(self, signum, frame): - self._interrupted = True - if not self._nosig: - raise SystemExit("Aborted...") - - @contextlib.contextmanager - def block(self): - """While this context manager is active any signals for aborting - the process will be queued and exit the program once the context - is left. - """ - - self._nosig = True - yield - self._nosig = False - if self._interrupted: - raise SystemExit("Aborted...") - - -def get_win32_unicode_argv(): - """Returns a unicode argv under Windows and standard sys.argv otherwise""" - - if os.name != "nt" or not PY2: - return sys.argv - - import ctypes - from ctypes import cdll, windll, wintypes - - GetCommandLineW = cdll.kernel32.GetCommandLineW - GetCommandLineW.argtypes = [] - GetCommandLineW.restype = wintypes.LPCWSTR - - CommandLineToArgvW = windll.shell32.CommandLineToArgvW - CommandLineToArgvW.argtypes = [ - wintypes.LPCWSTR, ctypes.POINTER(ctypes.c_int)] - CommandLineToArgvW.restype = ctypes.POINTER(wintypes.LPWSTR) - - LocalFree = windll.kernel32.LocalFree - LocalFree.argtypes = [wintypes.HLOCAL] - LocalFree.restype = wintypes.HLOCAL - - argc = ctypes.c_int() - argv = CommandLineToArgvW(GetCommandLineW(), ctypes.byref(argc)) - if not argv: - return - - res = argv[max(0, argc.value - len(sys.argv)):argc.value] - - LocalFree(argv) - - return res - - -def fsencoding(): - """The encoding used for paths, argv, environ, stdout and stdin""" - - if os.name == "nt": - return "" - - return locale.getpreferredencoding() or "utf-8" - - -def fsnative(text=u""): - """Returns the passed text converted to the preferred path type - for each platform. - """ - - assert isinstance(text, text_type) - - if os.name == "nt" or PY3: - return text - else: - return text.encode(fsencoding(), "replace") - return text - - -def is_fsnative(arg): - """If the passed value is of the preferred path type for each platform. - Note that on Python3+linux, paths can be bytes or str but this returns - False for bytes there. - """ - - if PY3 or os.name == "nt": - return isinstance(arg, text_type) - else: - return isinstance(arg, bytes) - - -def print_(*objects, **kwargs): - """A print which supports bytes and str+surrogates under python3. - - Needed so we can print anything passed to us through argv and environ. - Under Windows only text_type is allowed. - - Arguments: - objects: one or more bytes/text - linesep (bool): whether a line separator should be appended - sep (bool): whether objects should be printed separated by spaces - """ - - linesep = kwargs.pop("linesep", True) - sep = kwargs.pop("sep", True) - file_ = kwargs.pop("file", None) - if file_ is None: - file_ = sys.stdout - - old_cp = None - if os.name == "nt": - # Try to force the output to cp65001 aka utf-8. - # If that fails use the current one (most likely cp850, so - # most of unicode will be replaced with '?') - encoding = "utf-8" - old_cp = ctypes.windll.kernel32.GetConsoleOutputCP() - if ctypes.windll.kernel32.SetConsoleOutputCP(65001) == 0: - encoding = getattr(sys.stdout, "encoding", None) or "utf-8" - old_cp = None - else: - encoding = fsencoding() - - try: - if linesep: - objects = list(objects) + [os.linesep] - - parts = [] - for text in objects: - if isinstance(text, text_type): - if PY3: - try: - text = text.encode(encoding, 'surrogateescape') - except UnicodeEncodeError: - text = text.encode(encoding, 'replace') - else: - text = text.encode(encoding, 'replace') - parts.append(text) - - data = (b" " if sep else b"").join(parts) - try: - fileno = file_.fileno() - except (AttributeError, OSError, ValueError): - # for tests when stdout is replaced - try: - file_.write(data) - except TypeError: - file_.write(data.decode(encoding, "replace")) - else: - file_.flush() - os.write(fileno, data) - finally: - # reset the code page to what we had before - if old_cp is not None: - ctypes.windll.kernel32.SetConsoleOutputCP(old_cp) - - -class OptionParser(optparse.OptionParser): - """OptionParser subclass which supports printing Unicode under Windows""" - - def print_help(self, file=None): - print_(self.format_help(), file=file) diff --git a/libs/mutagen/_util.py b/libs/mutagen/_util.py index f05ff454..1332f9d3 100644 --- a/libs/mutagen/_util.py +++ b/libs/mutagen/_util.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Utility classes for Mutagen. @@ -12,13 +12,262 @@ You should not rely on the interfaces here being stable. They are intended for internal use in Mutagen only. """ +import sys import struct import codecs +import errno +import decimal +from io import BytesIO +try: + import mmap +except ImportError: + # Google App Engine has no mmap: + # https://github.com/quodlibet/mutagen/issues/286 + mmap = None + +from collections import namedtuple +from contextlib import contextmanager +from functools import wraps from fnmatch import fnmatchcase from ._compat import chr_, PY2, iteritems, iterbytes, integer_types, xrange, \ - izip + izip, text_type, reraise + + +def intround(value): + """Given a float returns a rounded int. Should give the same result on + both Py2/3 + """ + + return int(decimal.Decimal.from_float( + value).to_integral_value(decimal.ROUND_HALF_EVEN)) + + +def is_fileobj(fileobj): + """Returns: + bool: if an argument passed ot mutagen should be treated as a + file object + """ + + return not (isinstance(fileobj, (text_type, bytes)) or + hasattr(fileobj, "__fspath__")) + + +def verify_fileobj(fileobj, writable=False): + """Verifies that the passed fileobj is a file like object which + we can use. + + Args: + writable (bool): verify that the file object is writable as well + + Raises: + ValueError: In case the object is not a file object that is readable + (or writable if required) or is not opened in bytes mode. + """ + + try: + data = fileobj.read(0) + except Exception: + if not hasattr(fileobj, "read"): + raise ValueError("%r not a valid file object" % fileobj) + raise ValueError("Can't read from file object %r" % fileobj) + + if not isinstance(data, bytes): + raise ValueError( + "file object %r not opened in binary mode" % fileobj) + + if writable: + try: + fileobj.write(b"") + except Exception: + if not hasattr(fileobj, "write"): + raise ValueError("%r not a valid file object" % fileobj) + raise ValueError("Can't write to file object %r" % fileobj) + + +def verify_filename(filename): + """Checks of the passed in filename has the correct type. + + Raises: + ValueError: if not a filename + """ + + if is_fileobj(filename): + raise ValueError("%r not a filename" % filename) + + +def fileobj_name(fileobj): + """ + Returns: + text: A potential filename for a file object. Always a valid + path type, but might be empty or non-existent. + """ + + value = getattr(fileobj, "name", u"") + if not isinstance(value, (text_type, bytes)): + value = text_type(value) + return value + + +def loadfile(method=True, writable=False, create=False): + """A decorator for functions taking a `filething` as a first argument. + + Passes a FileThing instance as the first argument to the wrapped function. + + Args: + method (bool): If the wrapped functions is a method + writable (bool): If a filename is passed opens the file readwrite, if + passed a file object verifies that it is writable. + create (bool): If passed a filename that does not exist will create + a new empty file. + """ + + def convert_file_args(args, kwargs): + filething = args[0] if args else None + filename = kwargs.pop("filename", None) + fileobj = kwargs.pop("fileobj", None) + return filething, filename, fileobj, args[1:], kwargs + + def wrap(func): + + @wraps(func) + def wrapper(self, *args, **kwargs): + filething, filename, fileobj, args, kwargs = \ + convert_file_args(args, kwargs) + with _openfile(self, filething, filename, fileobj, + writable, create) as h: + return func(self, h, *args, **kwargs) + + @wraps(func) + def wrapper_func(*args, **kwargs): + filething, filename, fileobj, args, kwargs = \ + convert_file_args(args, kwargs) + with _openfile(None, filething, filename, fileobj, + writable, create) as h: + return func(h, *args, **kwargs) + + return wrapper if method else wrapper_func + + return wrap + + +def convert_error(exc_src, exc_dest): + """A decorator for reraising exceptions with a different type. + Mostly useful for IOError. + + Args: + exc_src (type): The source exception type + exc_dest (type): The target exception type. + """ + + def wrap(func): + + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except exc_dest: + raise + except exc_src as err: + reraise(exc_dest, err, sys.exc_info()[2]) + + return wrapper + + return wrap + + +FileThing = namedtuple("FileThing", ["fileobj", "filename", "name"]) +"""filename is None if the source is not a filename. name is a filename which +can be used for file type detection +""" + + +@contextmanager +def _openfile(instance, filething, filename, fileobj, writable, create): + """yields a FileThing + + Args: + filething: Either a file name, a file object or None + filename: Either a file name or None + fileobj: Either a file object or None + writable (bool): if the file should be opened + create (bool): if the file should be created if it doesn't exist. + implies writable + Raises: + MutagenError: In case opening the file failed + TypeError: in case neither a file name or a file object is passed + """ + + assert not create or writable + + # to allow stacked context managers, just pass the result through + if isinstance(filething, FileThing): + filename = filething.filename + fileobj = filething.fileobj + filething = None + + if filething is not None: + if is_fileobj(filething): + fileobj = filething + elif hasattr(filething, "__fspath__"): + filename = filething.__fspath__() + if not isinstance(filename, (bytes, text_type)): + raise TypeError("expected __fspath__() to return a filename") + else: + filename = filething + + if instance is not None: + # XXX: take "not writable" as loading the file.. + if not writable: + instance.filename = filename + elif filename is None: + filename = getattr(instance, "filename", None) + + if fileobj is not None: + verify_fileobj(fileobj, writable=writable) + yield FileThing(fileobj, filename, filename or fileobj_name(fileobj)) + elif filename is not None: + verify_filename(filename) + + inmemory_fileobj = False + try: + fileobj = open(filename, "rb+" if writable else "rb") + except IOError as e: + if writable and e.errno == errno.EOPNOTSUPP: + # Some file systems (gvfs over fuse) don't support opening + # files read/write. To make things still work read the whole + # file into an in-memory file like object and write it back + # later. + # https://github.com/quodlibet/mutagen/issues/300 + try: + with open(filename, "rb") as fileobj: + fileobj = BytesIO(fileobj.read()) + except IOError as e2: + raise MutagenError(e2) + inmemory_fileobj = True + elif create and e.errno == errno.ENOENT: + assert writable + try: + fileobj = open(filename, "wb+") + except IOError as e2: + raise MutagenError(e2) + else: + raise MutagenError(e) + + with fileobj as fileobj: + yield FileThing(fileobj, filename, filename) + + if inmemory_fileobj: + assert writable + data = fileobj.getvalue() + try: + with open(filename, "wb") as fileobj: + fileobj.write(data) + except IOError as e: + raise MutagenError(e) + else: + raise TypeError("Missing filename or fileobj argument") class MutagenError(Exception): @@ -31,6 +280,11 @@ class MutagenError(Exception): def total_ordering(cls): + """Adds all possible ordering methods to a class. + + Needs a working __eq__ and __lt__ and will supply the rest. + """ + assert "__eq__" in cls.__dict__ assert "__lt__" in cls.__dict__ @@ -60,6 +314,25 @@ def hashable(cls): def enum(cls): + """A decorator for creating an int enum class. + + Makes the values a subclass of the type and implements repr/str. + The new class will be a subclass of int. + + Args: + cls (type): The class to convert to an enum + + Returns: + type: A new class + + :: + + @enum + class Foo(object): + FOO = 1 + BAR = 2 + """ + assert cls.__bases__ == (object,) d = dict(cls.__dict__) @@ -89,6 +362,60 @@ def enum(cls): return new_type +def flags(cls): + """A decorator for creating an int flags class. + + Makes the values a subclass of the type and implements repr/str. + The new class will be a subclass of int. + + Args: + cls (type): The class to convert to an flags + + Returns: + type: A new class + + :: + + @flags + class Foo(object): + FOO = 1 + BAR = 2 + """ + + assert cls.__bases__ == (object,) + + d = dict(cls.__dict__) + new_type = type(cls.__name__, (int,), d) + new_type.__module__ = cls.__module__ + + map_ = {} + for key, value in iteritems(d): + if key.upper() == key and isinstance(value, integer_types): + value_instance = new_type(value) + setattr(new_type, key, value_instance) + map_[value] = key + + def str_(self): + value = int(self) + matches = [] + for k, v in map_.items(): + if value & k: + matches.append("%s.%s" % (type(self).__name__, v)) + value &= ~k + if value != 0 or not matches: + matches.append(text_type(value)) + + return " | ".join(matches) + + def repr_(self): + return "<%s: %d>" % (str(self), int(self)) + + setattr(new_type, "__repr__", repr_) + setattr(new_type, "__str__", str_) + + return new_type + + @total_ordering class DictMixin(object): """Implement the dict API using keys() and __*item__ methods. @@ -244,6 +571,19 @@ def _fill_cdata(cls): if s.size == 1: esuffix = "" bits = str(s.size * 8) + + if unsigned: + max_ = 2 ** (s.size * 8) - 1 + min_ = 0 + else: + max_ = 2 ** (s.size * 8 - 1) - 1 + min_ = - 2 ** (s.size * 8 - 1) + + funcs["%s%s_min" % (prefix, name)] = min_ + funcs["%s%s_max" % (prefix, name)] = max_ + funcs["%sint%s_min" % (prefix, bits)] = min_ + funcs["%sint%s_max" % (prefix, bits)] = max_ + funcs["%s%s%s" % (prefix, name, esuffix)] = unpack funcs["%sint%s%s" % (prefix, bits, esuffix)] = unpack funcs["%s%s%s_from" % (prefix, name, esuffix)] = unpack_from @@ -276,10 +616,15 @@ _fill_cdata(cdata) def get_size(fileobj): - """Returns the size of the file object. The position when passed in will - be preserved if no error occurs. + """Returns the size of the file. + The position when passed in will be preserved if no error occurs. - In case of an error raises IOError. + Args: + fileobj (fileobj) + Returns: + int: The size of the file + Raises: + IOError """ old_pos = fileobj.tell() @@ -290,62 +635,226 @@ def get_size(fileobj): fileobj.seek(old_pos, 0) +def read_full(fileobj, size): + """Like fileobj.read but raises IOError if not all requested data is + returned. + + If you want to distinguish IOError and the EOS case, better handle + the error yourself instead of using this. + + Args: + fileobj (fileobj) + size (int): amount of bytes to read + Raises: + IOError: In case read fails or not enough data is read + """ + + if size < 0: + raise ValueError("size must not be negative") + + data = fileobj.read(size) + if len(data) != size: + raise IOError + return data + + +def seek_end(fileobj, offset): + """Like fileobj.seek(-offset, 2), but will not try to go beyond the start + + Needed since file objects from BytesIO will not raise IOError and + file objects from open() will raise IOError if going to a negative offset. + To make things easier for custom implementations, instead of allowing + both behaviors, we just don't do it. + + Args: + fileobj (fileobj) + offset (int): how many bytes away from the end backwards to seek to + + Raises: + IOError + """ + + if offset < 0: + raise ValueError + + if get_size(fileobj) < offset: + fileobj.seek(0, 0) + else: + fileobj.seek(-offset, 2) + + +def mmap_move(fileobj, dest, src, count): + """Mmaps the file object if possible and moves 'count' data + from 'src' to 'dest'. All data has to be inside the file size + (enlarging the file through this function isn't possible) + + Will adjust the file offset. + + Args: + fileobj (fileobj) + dest (int): The destination offset + src (int): The source offset + count (int) The amount of data to move + Raises: + mmap.error: In case move failed + IOError: In case an operation on the fileobj fails + ValueError: In case invalid parameters were given + """ + + assert mmap is not None, "no mmap support" + + if dest < 0 or src < 0 or count < 0: + raise ValueError("Invalid parameters") + + try: + fileno = fileobj.fileno() + except (AttributeError, IOError): + raise mmap.error( + "File object does not expose/support a file descriptor") + + fileobj.seek(0, 2) + filesize = fileobj.tell() + length = max(dest, src) + count + + if length > filesize: + raise ValueError("Not in file size boundary") + + offset = ((min(dest, src) // mmap.ALLOCATIONGRANULARITY) * + mmap.ALLOCATIONGRANULARITY) + assert dest >= offset + assert src >= offset + assert offset % mmap.ALLOCATIONGRANULARITY == 0 + + # Windows doesn't handle empty mappings, add a fast path here instead + if count == 0: + return + + # fast path + if src == dest: + return + + fileobj.flush() + file_map = mmap.mmap(fileno, length - offset, offset=offset) + try: + file_map.move(dest - offset, src - offset, count) + finally: + file_map.close() + + +def resize_file(fobj, diff, BUFFER_SIZE=2 ** 16): + """Resize a file by `diff`. + + New space will be filled with zeros. + + Args: + fobj (fileobj) + diff (int): amount of size to change + Raises: + IOError + """ + + fobj.seek(0, 2) + filesize = fobj.tell() + + if diff < 0: + if filesize + diff < 0: + raise ValueError + # truncate flushes internally + fobj.truncate(filesize + diff) + elif diff > 0: + try: + while diff: + addsize = min(BUFFER_SIZE, diff) + fobj.write(b"\x00" * addsize) + diff -= addsize + fobj.flush() + except IOError as e: + if e.errno == errno.ENOSPC: + # To reduce the chance of corrupt files in case of missing + # space try to revert the file expansion back. Of course + # in reality every in-file-write can also fail due to COW etc. + # Note: IOError gets also raised in flush() due to buffering + fobj.truncate(filesize) + raise + + +def fallback_move(fobj, dest, src, count, BUFFER_SIZE=2 ** 16): + """Moves data around using read()/write(). + + Args: + fileobj (fileobj) + dest (int): The destination offset + src (int): The source offset + count (int) The amount of data to move + Raises: + IOError: In case an operation on the fileobj fails + ValueError: In case invalid parameters were given + """ + + if dest < 0 or src < 0 or count < 0: + raise ValueError + + fobj.seek(0, 2) + filesize = fobj.tell() + + if max(dest, src) + count > filesize: + raise ValueError("area outside of file") + + if src > dest: + moved = 0 + while count - moved: + this_move = min(BUFFER_SIZE, count - moved) + fobj.seek(src + moved) + buf = fobj.read(this_move) + fobj.seek(dest + moved) + fobj.write(buf) + moved += this_move + fobj.flush() + else: + while count: + this_move = min(BUFFER_SIZE, count) + fobj.seek(src + count - this_move) + buf = fobj.read(this_move) + fobj.seek(count + dest - this_move) + fobj.write(buf) + count -= this_move + fobj.flush() + + def insert_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): """Insert size bytes of empty space starting at offset. fobj must be an open file object, open rb+ or equivalent. Mutagen tries to use mmap to resize the file, but falls back to a significantly slower method if mmap fails. + + Args: + fobj (fileobj) + size (int): The amount of space to insert + offset (int): The offset at which to insert the space + Raises: + IOError """ - assert 0 < size - assert 0 <= offset + if size < 0 or offset < 0: + raise ValueError fobj.seek(0, 2) filesize = fobj.tell() movesize = filesize - offset - fobj.write(b'\x00' * size) - fobj.flush() - try: - import mmap - file_map = mmap.mmap(fobj.fileno(), filesize + size) + if movesize < 0: + raise ValueError + + resize_file(fobj, size, BUFFER_SIZE) + + if mmap is not None: try: - file_map.move(offset + size, offset, movesize) - finally: - file_map.close() - except (ValueError, EnvironmentError, ImportError, AttributeError): - # handle broken mmap scenarios, BytesIO() - fobj.truncate(filesize) - - fobj.seek(0, 2) - padsize = size - # Don't generate an enormous string if we need to pad - # the file out several megs. - while padsize: - addsize = min(BUFFER_SIZE, padsize) - fobj.write(b"\x00" * addsize) - padsize -= addsize - - fobj.seek(filesize, 0) - while movesize: - # At the start of this loop, fobj is pointing at the end - # of the data we need to move, which is of movesize length. - thismove = min(BUFFER_SIZE, movesize) - # Seek back however much we're going to read this frame. - fobj.seek(-thismove, 1) - nextpos = fobj.tell() - # Read it, so we're back at the end. - data = fobj.read(thismove) - # Seek back to where we need to write it. - fobj.seek(-thismove + size, 1) - # Write it. - fobj.write(data) - # And seek back to the end of the unmoved data. - fobj.seek(nextpos) - movesize -= thismove - - fobj.flush() + mmap_move(fobj, offset + size, offset, movesize) + except mmap.error: + fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE) + else: + fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE) def delete_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): @@ -354,42 +863,47 @@ def delete_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): fobj must be an open file object, open rb+ or equivalent. Mutagen tries to use mmap to resize the file, but falls back to a significantly slower method if mmap fails. + + Args: + fobj (fileobj) + size (int): The amount of space to delete + offset (int): The start of the space to delete + Raises: + IOError """ - assert 0 < size - assert 0 <= offset + if size < 0 or offset < 0: + raise ValueError fobj.seek(0, 2) filesize = fobj.tell() movesize = filesize - offset - size - assert 0 <= movesize - if movesize > 0: - fobj.flush() + if movesize < 0: + raise ValueError + + if mmap is not None: try: - import mmap - file_map = mmap.mmap(fobj.fileno(), filesize) - try: - file_map.move(offset, offset + size, movesize) - finally: - file_map.close() - except (ValueError, EnvironmentError, ImportError, AttributeError): - # handle broken mmap scenarios, BytesIO() - fobj.seek(offset + size) - buf = fobj.read(BUFFER_SIZE) - while buf: - fobj.seek(offset) - fobj.write(buf) - offset += len(buf) - fobj.seek(offset + size) - buf = fobj.read(BUFFER_SIZE) - fobj.truncate(filesize - size) - fobj.flush() + mmap_move(fobj, offset, offset + size, movesize) + except mmap.error: + fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE) + else: + fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE) + + resize_file(fobj, -size, BUFFER_SIZE) def resize_bytes(fobj, old_size, new_size, offset): """Resize an area in a file adding and deleting at the end of it. Does nothing if no resizing is needed. + + Args: + fobj (fileobj) + old_size (int): The area starting at offset + new_size (int): The new size of the area + offset (int): The start of the area + Raises: + IOError """ if new_size < old_size: @@ -405,6 +919,15 @@ def resize_bytes(fobj, old_size, new_size, offset): def dict_match(d, key, default=None): """Like __getitem__ but works as if the keys() are all filename patterns. Returns the value of any dict key that matches the passed key. + + Args: + d (dict): A dict with filename patterns as keys + key (str): A key potentially matching any of the keys + default (object): The object to return if no pattern matched the + passed in key + Returns: + object: The dict value where the dict key matched the passed in key. + Or default if there was no match. """ if key in d and "[" not in key: @@ -416,15 +939,57 @@ def dict_match(d, key, default=None): return default +def encode_endian(text, encoding, errors="strict", le=True): + """Like text.encode(encoding) but always returns little endian/big endian + BOMs instead of the system one. + + Args: + text (text) + encoding (str) + errors (str) + le (boolean): if little endian + Returns: + bytes + Raises: + UnicodeEncodeError + LookupError + """ + + encoding = codecs.lookup(encoding).name + + if encoding == "utf-16": + if le: + return codecs.BOM_UTF16_LE + text.encode("utf-16-le", errors) + else: + return codecs.BOM_UTF16_BE + text.encode("utf-16-be", errors) + elif encoding == "utf-32": + if le: + return codecs.BOM_UTF32_LE + text.encode("utf-32-le", errors) + else: + return codecs.BOM_UTF32_BE + text.encode("utf-32-be", errors) + else: + return text.encode(encoding, errors) + + def decode_terminated(data, encoding, strict=True): """Returns the decoded data until the first NULL terminator and all data after it. - In case the data can't be decoded raises UnicodeError. - In case the encoding is not found raises LookupError. - In case the data isn't null terminated (even if it is encoded correctly) - raises ValueError except if strict is False, then the decoded string - will be returned anyway. + Args: + data (bytes): data to decode + encoding (str): The codec to use + strict (bool): If True will raise ValueError in case no NULL is found + but the available data decoded successfully. + Returns: + Tuple[`text`, `bytes`]: A tuple containing the decoded text and the + remaining data after the found NULL termination. + + Raises: + UnicodeError: In case the data can't be decoded. + LookupError:In case the encoding is not found. + ValueError: In case the data isn't null terminated (even if it is + encoded correctly) except if strict is False, then the decoded + string will be returned anyway. """ codec_info = codecs.lookup(encoding) diff --git a/libs/mutagen/_vorbis.py b/libs/mutagen/_vorbis.py index 17634e06..f8b0ee7a 100644 --- a/libs/mutagen/_vorbis.py +++ b/libs/mutagen/_vorbis.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005-2006 Joe Wreschnig # 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Vorbis comment data. @@ -20,7 +20,7 @@ import sys import mutagen from ._compat import reraise, BytesIO, text_type, xrange, PY3, PY2 -from mutagen._util import DictMixin, cdata +from mutagen._util import DictMixin, cdata, MutagenError def is_valid_key(key): @@ -45,7 +45,7 @@ def is_valid_key(key): istag = is_valid_key -class error(IOError): +class error(MutagenError): pass @@ -68,8 +68,7 @@ class VComment(mutagen.Tags, list): file-like object, not a filename. Attributes: - - * vendor -- the stream 'vendor' (i.e. writer); default 'Mutagen' + vendor (text): the stream 'vendor' (i.e. writer); default 'Mutagen' """ vendor = u"Mutagen " + mutagen.version_string @@ -91,12 +90,11 @@ class VComment(mutagen.Tags, list): def load(self, fileobj, errors='replace', framing=True): """Parse a Vorbis comment from a file-like object. - Keyword arguments: - - * errors: - 'strict', 'replace', or 'ignore'. This affects Unicode decoding - and how other malformed content is interpreted. - * framing -- if true, fail if a framing bit is not present + Arguments: + errors (str): 'strict', 'replace', or 'ignore'. + This affects Unicode decoding and how other malformed content + is interpreted. + framing (bool): if true, fail if a framing bit is not present Framing bits are required by the Vorbis comment specification, but are not used in FLAC Vorbis comment blocks. @@ -169,7 +167,7 @@ class VComment(mutagen.Tags, list): try: value.decode("utf-8") - except: + except Exception: raise ValueError("%r is not a valid value" % value) return True @@ -186,9 +184,8 @@ class VComment(mutagen.Tags, list): Validation is always performed, so calling this function on invalid data may raise a ValueError. - Keyword arguments: - - * framing -- if true, append a framing bit (see load) + Arguments: + framing (bool): if true, append a framing bit (see load) """ self.validate() diff --git a/libs/mutagen/aac.py b/libs/mutagen/aac.py index 83968a05..fa6f7064 100644 --- a/libs/mutagen/aac.py +++ b/libs/mutagen/aac.py @@ -2,8 +2,9 @@ # Copyright (C) 2014 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """ * ADTS - Audio Data Transport Stream @@ -13,7 +14,9 @@ from mutagen import StreamInfo from mutagen._file import FileType -from mutagen._util import BitReader, BitReaderError, MutagenError +from mutagen._util import BitReader, BitReaderError, MutagenError, loadfile, \ + convert_error +from mutagen.id3._util import BitPaddedInt from mutagen._compat import endswith, xrange @@ -262,16 +265,16 @@ class AACError(MutagenError): class AACInfo(StreamInfo): - """AAC stream information. + """AACInfo() + + AAC stream information. + The length of the stream is just a guess and might not be correct. Attributes: - - * channels -- number of audio channels - * length -- file length in seconds, as a float - * sample_rate -- audio sampling rate in Hz - * bitrate -- audio bitrate, in bits per second - - The length of the stream is just a guess and might not be correct. + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz + bitrate (`int`): audio bitrate, in bits per second """ channels = 0 @@ -279,11 +282,13 @@ class AACInfo(StreamInfo): sample_rate = 0 bitrate = 0 + @convert_error(IOError, AACError) def __init__(self, fileobj): + """Raises AACError""" + # skip id3v2 header start_offset = 0 header = fileobj.read(10) - from mutagen.id3 import BitPaddedInt if header.startswith(b"ID3"): size = BitPaddedInt(header[6:]) start_offset = size + 10 @@ -379,18 +384,25 @@ class AACInfo(StreamInfo): class AAC(FileType): - """Load ADTS or ADIF streams containing AAC. + """AAC(filething) + + Arguments: + filething (filething) + + Load ADTS or ADIF streams containing AAC. Tagging is not supported. Use the ID3/APEv2 classes directly instead. + + Attributes: + info (`AACInfo`) """ _mimes = ["audio/x-aac"] - def load(self, filename): - self.filename = filename - with open(filename, "rb") as h: - self.info = AACInfo(h) + @loadfile() + def load(self, filething): + self.info = AACInfo(filething.fileobj) def add_tags(self): raise AACError("doesn't support tags") diff --git a/libs/mutagen/aiff.py b/libs/mutagen/aiff.py index dc580063..66ec3af0 100644 --- a/libs/mutagen/aiff.py +++ b/libs/mutagen/aiff.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2014 Evan Purkhiser # 2014 Ben Ockmore # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """AIFF audio stream information and tags.""" @@ -18,16 +18,17 @@ from mutagen import StreamInfo, FileType from mutagen.id3 import ID3 from mutagen.id3._util import ID3NoHeaderError, error as ID3Error -from mutagen._util import resize_bytes, delete_bytes, MutagenError +from mutagen._util import resize_bytes, delete_bytes, MutagenError, loadfile, \ + convert_error __all__ = ["AIFF", "Open", "delete"] -class error(MutagenError, RuntimeError): +class error(MutagenError): pass -class InvalidChunk(error, IOError): +class InvalidChunk(error): pass @@ -42,6 +43,14 @@ def is_valid_chunk_id(id): (max(id) <= u'~')) +def assert_valid_chunk_id(id): + + assert isinstance(id, text_type) + + if not is_valid_chunk_id(id): + raise ValueError("AIFF key must be four ASCII characters.") + + def read_float(data): # 10 bytes expon, himant, lomant = struct.unpack('>hLL', data) sign = 1 @@ -166,44 +175,32 @@ class IFFFile(object): def __contains__(self, id_): """Check if the IFF file contains a specific chunk""" - assert isinstance(id_, text_type) - - if not is_valid_chunk_id(id_): - raise KeyError("AIFF key must be four ASCII characters.") + assert_valid_chunk_id(id_) return id_ in self.__chunks def __getitem__(self, id_): """Get a chunk from the IFF file""" - assert isinstance(id_, text_type) - - if not is_valid_chunk_id(id_): - raise KeyError("AIFF key must be four ASCII characters.") + assert_valid_chunk_id(id_) try: return self.__chunks[id_] except KeyError: raise KeyError( - "%r has no %r chunk" % (self.__fileobj.name, id_)) + "%r has no %r chunk" % (self.__fileobj, id_)) def __delitem__(self, id_): """Remove a chunk from the IFF file""" - assert isinstance(id_, text_type) - - if not is_valid_chunk_id(id_): - raise KeyError("AIFF key must be four ASCII characters.") + assert_valid_chunk_id(id_) self.__chunks.pop(id_).delete() def insert_chunk(self, id_): """Insert a new chunk at the end of the IFF file""" - assert isinstance(id_, text_type) - - if not is_valid_chunk_id(id_): - raise KeyError("AIFF key must be four ASCII characters.") + assert_valid_chunk_id(id_) self.__fileobj.seek(self.__next_offset) self.__fileobj.write(pack('>4si', id_.ljust(4).encode('ascii'), 0)) @@ -216,17 +213,18 @@ class IFFFile(object): class AIFFInfo(StreamInfo): - """AIFF audio stream information. + """AIFFInfo() + + AIFF audio stream information. Information is parsed from the COMM chunk of the AIFF file - Useful attributes: - - * length -- audio length, in seconds - * bitrate -- audio bitrate, in bits per second - * channels -- The number of audio channels - * sample_rate -- audio sample rate, in Hz - * sample_size -- The audio sample size + Attributes: + length (`float`): audio length, in seconds + bitrate (`int`): audio bitrate, in bits per second + channels (`int`): The number of audio channels + sample_rate (`int`): audio sample rate, in Hz + sample_size (`int`): The audio sample size """ length = 0 @@ -234,7 +232,10 @@ class AIFFInfo(StreamInfo): channels = 0 sample_rate = 0 + @convert_error(IOError, error) def __init__(self, fileobj): + """Raises error""" + iff = IFFFile(fileobj) try: common_chunk = iff[u'COMM'] @@ -242,6 +243,8 @@ class AIFFInfo(StreamInfo): raise error(str(e)) data = common_chunk.read() + if len(data) < 18: + raise error info = struct.unpack('>hLh10s', data[:18]) channels, frame_count, sample_size, sample_rate = info @@ -266,61 +269,65 @@ class _IFFID3(ID3): except (InvalidChunk, KeyError): raise ID3NoHeaderError("No ID3 chunk") - def save(self, filename=None, v2_version=4, v23_sep='/', padding=None): + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething, v2_version=4, v23_sep='/', padding=None): """Save ID3v2 data to the AIFF file""" - if filename is None: - filename = self.filename + fileobj = filething.fileobj - # Unlike the parent ID3.save method, we won't save to a blank file - # since we would have to construct a empty AIFF file - with open(filename, 'rb+') as fileobj: - iff_file = IFFFile(fileobj) + iff_file = IFFFile(fileobj) - if u'ID3' not in iff_file: - iff_file.insert_chunk(u'ID3') + if u'ID3' not in iff_file: + iff_file.insert_chunk(u'ID3') - chunk = iff_file[u'ID3'] + chunk = iff_file[u'ID3'] - try: - data = self._prepare_data( - fileobj, chunk.data_offset, chunk.data_size, v2_version, - v23_sep, padding) - except ID3Error as e: - reraise(error, e, sys.exc_info()[2]) + try: + data = self._prepare_data( + fileobj, chunk.data_offset, chunk.data_size, v2_version, + v23_sep, padding) + except ID3Error as e: + reraise(error, e, sys.exc_info()[2]) - new_size = len(data) - new_size += new_size % 2 # pad byte - assert new_size % 2 == 0 - chunk.resize(new_size) - data += (new_size - len(data)) * b'\x00' - assert new_size == len(data) - chunk.write(data) + new_size = len(data) + new_size += new_size % 2 # pad byte + assert new_size % 2 == 0 + chunk.resize(new_size) + data += (new_size - len(data)) * b'\x00' + assert new_size == len(data) + chunk.write(data) - def delete(self, filename=None): + @loadfile(writable=True) + def delete(self, filething): """Completely removes the ID3 chunk from the AIFF file""" - if filename is None: - filename = self.filename - delete(filename) + delete(filething) self.clear() -def delete(filename): +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): """Completely removes the ID3 chunk from the AIFF file""" - with open(filename, "rb+") as file_: - try: - del IFFFile(file_)[u'ID3'] - except KeyError: - pass + try: + del IFFFile(filething.fileobj)[u'ID3'] + except KeyError: + pass class AIFF(FileType): - """An AIFF audio file. + """AIFF(filething) - :ivar info: :class:`AIFFInfo` - :ivar tags: :class:`ID3` + An AIFF audio file. + + Arguments: + filething (filething) + + Attributes: + tags (`mutagen.id3.ID3`) + info (`AIFFInfo`) """ _mimes = ["audio/aiff", "audio/x-aiff"] @@ -339,19 +346,24 @@ class AIFF(FileType): else: raise error("an ID3 tag already exists") - def load(self, filename, **kwargs): + @convert_error(IOError, error) + @loadfile() + def load(self, filething, **kwargs): """Load stream and tag information from a file.""" - self.filename = filename + + fileobj = filething.fileobj try: - self.tags = _IFFID3(filename, **kwargs) + self.tags = _IFFID3(fileobj, **kwargs) except ID3NoHeaderError: self.tags = None except ID3Error as e: raise error(e) + else: + self.tags.filename = self.filename - with open(filename, "rb") as fileobj: - self.info = AIFFInfo(fileobj) + fileobj.seek(0, 0) + self.info = AIFFInfo(fileobj) Open = AIFF diff --git a/libs/mutagen/apev2.py b/libs/mutagen/apev2.py index 3b79aba9..8789d634 100644 --- a/libs/mutagen/apev2.py +++ b/libs/mutagen/apev2.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """APEv2 reading and writing. @@ -37,8 +37,8 @@ from collections import MutableSequence from ._compat import (cBytesIO, PY3, text_type, PY2, reraise, swap_to_string, xrange) from mutagen import Metadata, FileType, StreamInfo -from mutagen._util import (DictMixin, cdata, delete_bytes, total_ordering, - MutagenError) +from mutagen._util import DictMixin, cdata, delete_bytes, total_ordering, \ + MutagenError, loadfile, convert_error, seek_end, get_size def is_valid_apev2_key(key): @@ -68,19 +68,19 @@ HAS_NO_FOOTER = 1 << 30 IS_HEADER = 1 << 29 -class error(IOError, MutagenError): +class error(MutagenError): pass -class APENoHeaderError(error, ValueError): +class APENoHeaderError(error): pass -class APEUnsupportedVersionError(error, ValueError): +class APEUnsupportedVersionError(error): pass -class APEBadItemError(error, ValueError): +class APEBadItemError(error): pass @@ -103,6 +103,8 @@ class _APEv2Data(object): is_at_start = False def __init__(self, fileobj): + """Raises IOError and apev2.error""" + self.__find_metadata(fileobj) if self.header is None: @@ -137,6 +139,8 @@ class _APEv2Data(object): # Check for an APEv2 tag followed by an ID3v1 tag at the end. try: + if get_size(fileobj) < 128: + raise IOError fileobj.seek(-128, 2) if fileobj.read(3) == b"TAG": @@ -173,11 +177,18 @@ class _APEv2Data(object): self.header = 0 def __fill_missing(self, fileobj): + """Raises IOError and apev2.error""" + fileobj.seek(self.metadata + 8) - self.version = fileobj.read(4) - self.size = cdata.uint_le(fileobj.read(4)) - self.items = cdata.uint_le(fileobj.read(4)) - self.flags = cdata.uint_le(fileobj.read(4)) + + data = fileobj.read(16) + if len(data) != 16: + raise error + + self.version = data[:4] + self.size = cdata.uint32_le(data[4:8]) + self.items = cdata.uint32_le(data[8:12]) + self.flags = cdata.uint32_le(data[12:]) if self.header is not None: self.data = self.header + 32 @@ -256,7 +267,9 @@ class _CIDictProxy(DictMixin): class APEv2(_CIDictProxy, Metadata): - """A file with an APEv2 tag. + """APEv2(filething=None) + + A file with an APEv2 tag. ID3v1 tags are silently ignored and overwritten. """ @@ -269,12 +282,15 @@ class APEv2(_CIDictProxy, Metadata): items = sorted(self.items()) return u"\n".join(u"%s=%s" % (k, v.pprint()) for k, v in items) - def load(self, filename): - """Load tags from a filename.""" + @convert_error(IOError, error) + @loadfile() + def load(self, filething): + """Load tags from a filename. - self.filename = filename - with open(filename, "rb") as fileobj: - data = _APEv2Data(fileobj) + Raises apev2.error + """ + + data = _APEv2Data(filething.fileobj) if data.tag: self.clear() @@ -283,33 +299,45 @@ class APEv2(_CIDictProxy, Metadata): raise APENoHeaderError("No APE tag found") def __parse_tag(self, tag, count): + """Raises IOError and APEBadItemError""" + fileobj = cBytesIO(tag) for i in xrange(count): - size_data = fileobj.read(4) + tag_data = fileobj.read(8) # someone writes wrong item counts - if not size_data: + if not tag_data: break - size = cdata.uint_le(size_data) - flags = cdata.uint_le(fileobj.read(4)) + if len(tag_data) != 8: + raise error + size = cdata.uint32_le(tag_data[:4]) + flags = cdata.uint32_le(tag_data[4:8]) # Bits 1 and 2 bits are flags, 0-3 # Bit 0 is read/write flag, ignored kind = (flags & 6) >> 1 if kind == 3: raise APEBadItemError("value type must be 0, 1, or 2") + key = value = fileobj.read(1) + if not key: + raise APEBadItemError while key[-1:] != b'\x00' and value: value = fileobj.read(1) + if not value: + raise APEBadItemError key += value if key[-1:] == b"\x00": key = key[:-1] + if PY3: try: key = key.decode("ascii") except UnicodeError as err: reraise(APEBadItemError, err, sys.exc_info()[2]) value = fileobj.read(size) + if len(value) != size: + raise APEBadItemError value = _get_value_type(kind)._new(value) @@ -389,7 +417,9 @@ class APEv2(_CIDictProxy, Metadata): super(APEv2, self).__setitem__(key, value) - def save(self, filename=None): + @convert_error(IOError, error) + @loadfile(writable=True, create=True) + def save(self, filething): """Save changes to a file. If no filename is given, the one most recently loaded is used. @@ -398,11 +428,8 @@ class APEv2(_CIDictProxy, Metadata): a header and a footer. """ - filename = filename or self.filename - try: - fileobj = open(filename, "r+b") - except IOError: - fileobj = open(filename, "w+b") + fileobj = filething.fileobj + data = _APEv2Data(fileobj) if data.is_at_start: @@ -432,7 +459,7 @@ class APEv2(_CIDictProxy, Metadata): # "APE tags items should be sorted ascending by size... This is # not a MUST, but STRONGLY recommended. Actually the items should # be sorted by importance/byte, but this is not feasible." - tags.sort(key=len) + tags.sort(key=lambda tag: (len(tag), tag)) num_tags = len(tags) tags = b"".join(tags) @@ -451,30 +478,41 @@ class APEv2(_CIDictProxy, Metadata): footer += b"\0" * 8 fileobj.write(footer) - fileobj.close() - def delete(self, filename=None): + @convert_error(IOError, error) + @loadfile(writable=True) + def delete(self, filething): """Remove tags from a file.""" - filename = filename or self.filename - with open(filename, "r+b") as fileobj: - data = _APEv2Data(fileobj) - if data.start is not None and data.size is not None: - delete_bytes(fileobj, data.end - data.start, data.start) - + fileobj = filething.fileobj + data = _APEv2Data(fileobj) + if data.start is not None and data.size is not None: + delete_bytes(fileobj, data.end - data.start, data.start) self.clear() Open = APEv2 -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """delete(filething) + + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ try: - APEv2(filename).delete() + t = APEv2(filething) except APENoHeaderError: - pass + return + filething.fileobj.seek(0) + t.delete(filething) def _get_value_type(kind): @@ -675,6 +713,15 @@ class APEExtValue(_APEUtf8Value): class APEv2File(FileType): + """APEv2File(filething) + + Arguments: + filething (filething) + + Attributes: + tags (`APEv2`) + """ + class _Info(StreamInfo): length = 0 bitrate = 0 @@ -686,11 +733,18 @@ class APEv2File(FileType): def pprint(): return u"Unknown format with APEv2 tag." - def load(self, filename): - self.filename = filename - self.info = self._Info(open(filename, "rb")) + @loadfile() + def load(self, filething): + fileobj = filething.fileobj + + self.info = self._Info(fileobj) try: - self.tags = APEv2(filename) + fileobj.seek(0, 0) + except IOError as e: + raise error(e) + + try: + self.tags = APEv2(fileobj) except APENoHeaderError: self.tags = None @@ -703,8 +757,8 @@ class APEv2File(FileType): @staticmethod def score(filename, fileobj, header): try: - fileobj.seek(-160, 2) + seek_end(fileobj, 160) + footer = fileobj.read() except IOError: - fileobj.seek(0) - footer = fileobj.read() + return -1 return ((b"APETAGEX" in footer) - header.startswith(b"ID3")) diff --git a/libs/mutagen/asf/__init__.py b/libs/mutagen/asf/__init__.py index 7d37a86c..32e1bedb 100644 --- a/libs/mutagen/asf/__init__.py +++ b/libs/mutagen/asf/__init__.py @@ -3,15 +3,16 @@ # Copyright (C) 2006-2007 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write ASF (Window Media Audio) files.""" __all__ = ["ASF", "Open"] from mutagen import FileType, Tags, StreamInfo -from mutagen._util import resize_bytes, DictMixin +from mutagen._util import resize_bytes, DictMixin, loadfile, convert_error from mutagen._compat import string_types, long_, PY3, izip from ._util import error, ASFError, ASFHeaderError @@ -28,35 +29,31 @@ error, ASFError, ASFHeaderError, ASFValue class ASFInfo(StreamInfo): - """ASF stream information.""" + """ASFInfo() + + ASF stream information. + + Attributes: + length (`float`): "Length in seconds + sample_rate (`int`): Sample rate in Hz + bitrate (`int`): Bitrate in bps + channels (`int`): Number of channels + codec_type (`mutagen.text`): Name of the codec type of the first + audio stream or an empty string if unknown. Example: + ``Windows Media Audio 9 Standard`` + codec_name (`mutagen.text`): Name and maybe version of the codec used. + Example: ``Windows Media Audio 9.1`` + codec_description (`mutagen.text`): Further information on the codec + used. Example: ``64 kbps, 48 kHz, stereo 2-pass CBR`` + """ length = 0.0 - """Length in seconds (`float`)""" - sample_rate = 0 - """Sample rate in Hz (`int`)""" - bitrate = 0 - """Bitrate in bps (`int`)""" - channels = 0 - """Number of channels (`int`)""" - codec_type = u"" - """Name of the codec type of the first audio stream or - an empty string if unknown. Example: ``Windows Media Audio 9 Standard`` - (:class:`mutagen.text`) - """ - codec_name = u"" - """Name and maybe version of the codec used. Example: - ``Windows Media Audio 9.1`` (:class:`mutagen.text`) - """ - codec_description = u"" - """Further information on the codec used. - Example: ``64 kbps, 48 kHz, stereo 2-pass CBR`` (:class:`mutagen.text`) - """ def __init__(self): self.length = 0.0 @@ -68,9 +65,8 @@ class ASFInfo(StreamInfo): self.codec_description = u"" def pprint(self): - """Returns a stream information text summary - - :rtype: text + """Returns: + text: a stream information text summary """ s = u"ASF (%s) %d bps, %s Hz, %d channels, %.2f seconds" % ( @@ -80,7 +76,10 @@ class ASFInfo(StreamInfo): class ASFTags(list, DictMixin, Tags): - """Dictionary containing ASF attributes.""" + """ASFTags() + + Dictionary containing ASF attributes. + """ def __getitem__(self, key): """A list of values for the key. @@ -206,50 +205,65 @@ GUID = ASFGUIDAttribute.TYPE class ASF(FileType): - """An ASF file, probably containing WMA or WMV. + """ASF(filething) - :param filename: a filename to load - :raises mutagen.asf.error: In case loading fails + An ASF file, probably containing WMA or WMV. + + Arguments: + filething (filething) + + Attributes: + info (`ASFInfo`) + tags (`ASFTags`) """ _mimes = ["audio/x-ms-wma", "audio/x-ms-wmv", "video/x-ms-asf", "audio/x-wma", "video/x-wmv"] info = None - """A `ASFInfo` instance""" - tags = None - """A `ASFTags` instance""" - def load(self, filename): - self.filename = filename + @convert_error(IOError, error) + @loadfile() + def load(self, filething): + """load(filething) + + Args: + filething (filething) + Raises: + mutagen.MutagenError + """ + + fileobj = filething.fileobj + self.info = ASFInfo() self.tags = ASFTags() - with open(filename, "rb") as fileobj: - self._tags = {} + self._tags = {} + self._header = HeaderObject.parse_full(self, fileobj) - self._header = HeaderObject.parse_full(self, fileobj) + for guid in [ContentDescriptionObject.GUID, + ExtendedContentDescriptionObject.GUID, + MetadataObject.GUID, + MetadataLibraryObject.GUID]: + self.tags.extend(self._tags.pop(guid, [])) - for guid in [ContentDescriptionObject.GUID, - ExtendedContentDescriptionObject.GUID, MetadataObject.GUID, - MetadataLibraryObject.GUID]: - self.tags.extend(self._tags.pop(guid, [])) + assert not self._tags - assert not self._tags + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething, padding=None): + """save(filething=None, padding=None) - def save(self, filename=None, padding=None): - """Save tag changes back to the loaded file. + Save tag changes back to the loaded file. - :param padding: A callback which returns the amount of padding to use. - See :class:`mutagen.PaddingInfo` - - :raises mutagen.asf.error: In case saving fails + Args: + filething (filething) + padding (:obj:`mutagen.PaddingFunction`) + Raises: + mutagen.MutagenError """ - if filename is not None and filename != self.filename: - raise ValueError("saving to another file not supported atm") - # Move attributes to the right objects self.to_content_description = {} self.to_extended_content_description = {} @@ -292,25 +306,30 @@ class ASF(FileType): if header_ext.get_child(MetadataLibraryObject.GUID) is None: header_ext.objects.append(MetadataLibraryObject()) + fileobj = filething.fileobj # Render to file - with open(self.filename, "rb+") as fileobj: - old_size = header.parse_size(fileobj)[0] - data = header.render_full(self, fileobj, old_size, padding) - size = len(data) - resize_bytes(fileobj, old_size, size, 0) - fileobj.seek(0) - fileobj.write(data) + old_size = header.parse_size(fileobj)[0] + data = header.render_full(self, fileobj, old_size, padding) + size = len(data) + resize_bytes(fileobj, old_size, size, 0) + fileobj.seek(0) + fileobj.write(data) def add_tags(self): raise ASFError - def delete(self, filename=None): + @loadfile(writable=True) + def delete(self, filething): + """delete(filething=None) - if filename is not None and filename != self.filename: - raise ValueError("saving to another file not supported atm") + Args: + filething (filething) + Raises: + mutagen.MutagenError + """ self.tags.clear() - self.save(padding=lambda x: 0) + self.save(filething, padding=lambda x: 0) @staticmethod def score(filename, fileobj, header): diff --git a/libs/mutagen/asf/_attrs.py b/libs/mutagen/asf/_attrs.py index 4621c9fa..8111c1c2 100644 --- a/libs/mutagen/asf/_attrs.py +++ b/libs/mutagen/asf/_attrs.py @@ -3,8 +3,9 @@ # Copyright (C) 2006-2007 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import sys import struct @@ -35,7 +36,7 @@ class ASFBaseAttribute(object): stream=None, **kwargs): self.language = language self.stream = stream - if data: + if data is not None: self.value = self.parse(data, **kwargs) else: if value is None: diff --git a/libs/mutagen/asf/_objects.py b/libs/mutagen/asf/_objects.py index 001c58a1..1c15d613 100644 --- a/libs/mutagen/asf/_objects.py +++ b/libs/mutagen/asf/_objects.py @@ -3,8 +3,9 @@ # Copyright (C) 2006-2007 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import struct diff --git a/libs/mutagen/asf/_util.py b/libs/mutagen/asf/_util.py index 42154bff..45ade0e2 100644 --- a/libs/mutagen/asf/_util.py +++ b/libs/mutagen/asf/_util.py @@ -3,15 +3,16 @@ # Copyright (C) 2006-2007 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import struct from mutagen._util import MutagenError -class error(IOError, MutagenError): +class error(MutagenError): """Error raised by :mod:`mutagen.asf`""" @@ -34,7 +35,7 @@ def guid2bytes(s): p("<IHH", int(s[:8], 16), int(s[9:13], 16), int(s[14:18], 16)), p(">H", int(s[19:23], 16)), p(">Q", int(s[24:], 16))[2:], - ]) + ]) def bytes2guid(s): diff --git a/libs/mutagen/dsf.py b/libs/mutagen/dsf.py new file mode 100644 index 00000000..ed5faae2 --- /dev/null +++ b/libs/mutagen/dsf.py @@ -0,0 +1,358 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2017 Boris Pruessmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Read and write DSF audio stream information and tags.""" + + +import sys +import struct + +from ._compat import cBytesIO, reraise, endswith + +from mutagen import FileType, StreamInfo +from mutagen._util import cdata, MutagenError, loadfile, convert_error +from mutagen.id3 import ID3 +from mutagen.id3._util import ID3NoHeaderError, error as ID3Error + + +__all__ = ["DSF", "Open", "delete"] + + +class error(MutagenError): + pass + + +class DSFChunk(object): + """A generic chunk of a DSFFile.""" + + chunk_offset = 0 + chunk_header = " " + chunk_size = -1 + + def __init__(self, fileobj, create=False): + self.fileobj = fileobj + + if not create: + self.chunk_offset = fileobj.tell() + self.load() + + def load(self): + raise NotImplementedError + + def write(self): + raise NotImplementedError + + +class DSDChunk(DSFChunk): + """Represents the first chunk of a DSF file""" + + CHUNK_SIZE = 28 + + total_size = 0 + offset_metdata_chunk = 0 + + def __init__(self, fileobj, create=False): + super(DSDChunk, self).__init__(fileobj, create) + + if create: + self.chunk_header = b"DSD " + self.chunk_size = DSDChunk.CHUNK_SIZE + + def load(self): + data = self.fileobj.read(DSDChunk.CHUNK_SIZE) + if len(data) != DSDChunk.CHUNK_SIZE: + raise error("DSF chunk truncated") + + self.chunk_header = data[0:4] + if self.chunk_header != b"DSD ": + raise error("DSF dsd header not found") + + self.chunk_size = cdata.ulonglong_le(data[4:12]) + if self.chunk_size != DSDChunk.CHUNK_SIZE: + raise error("DSF dsd header size mismatch") + + self.total_size = cdata.ulonglong_le(data[12:20]) + self.offset_metdata_chunk = cdata.ulonglong_le(data[20:28]) + + def write(self): + f = cBytesIO() + f.write(self.chunk_header) + f.write(struct.pack("<Q", DSDChunk.CHUNK_SIZE)) + f.write(struct.pack("<Q", self.total_size)) + f.write(struct.pack("<Q", self.offset_metdata_chunk)) + + self.fileobj.seek(self.chunk_offset) + self.fileobj.write(f.getvalue()) + + def pprint(self): + return (u"DSD Chunk (Total file size = %d, " + u"Pointer to Metadata chunk = %d)" % ( + self.total_size, self.offset_metdata_chunk)) + + +class FormatChunk(DSFChunk): + + CHUNK_SIZE = 52 + + VERSION = 1 + + FORMAT_DSD_RAW = 0 + """Format ID: DSD Raw""" + + format_version = VERSION + format_id = FORMAT_DSD_RAW + channel_type = 1 + channel_num = 1 + sampling_frequency = 2822400 + bits_per_sample = 1 + sample_count = 0 + block_size_per_channel = 4096 + + def __init__(self, fileobj, create=False): + super(FormatChunk, self).__init__(fileobj, create) + + if create: + self.chunk_header = b"fmt " + self.chunk_size = FormatChunk.CHUNK_SIZE + + def load(self): + data = self.fileobj.read(FormatChunk.CHUNK_SIZE) + if len(data) != FormatChunk.CHUNK_SIZE: + raise error("DSF chunk truncated") + + self.chunk_header = data[0:4] + if self.chunk_header != b"fmt ": + raise error("DSF fmt header not found") + + self.chunk_size = cdata.ulonglong_le(data[4:12]) + if self.chunk_size != FormatChunk.CHUNK_SIZE: + raise error("DSF dsd header size mismatch") + + self.format_version = cdata.uint_le(data[12:16]) + if self.format_version != FormatChunk.VERSION: + raise error("Unsupported format version") + + self.format_id = cdata.uint_le(data[16:20]) + if self.format_id != FormatChunk.FORMAT_DSD_RAW: + raise error("Unsupported format ID") + + self.channel_type = cdata.uint_le(data[20:24]) + self.channel_num = cdata.uint_le(data[24:28]) + self.sampling_frequency = cdata.uint_le(data[28:32]) + self.bits_per_sample = cdata.uint_le(data[32:36]) + self.sample_count = cdata.ulonglong_le(data[36:44]) + + def pprint(self): + return u"fmt Chunk (Channel Type = %d, Channel Num = %d, " \ + u"Sampling Frequency = %d, %.2f seconds)" % \ + (self.channel_type, self.channel_num, self.sampling_frequency, + self.length) + + +class DataChunk(DSFChunk): + + CHUNK_SIZE = 12 + + data = "" + + def __init__(self, fileobj, create=False): + super(DataChunk, self).__init__(fileobj, create) + + if create: + self.chunk_header = b"data" + self.chunk_size = DataChunk.CHUNK_SIZE + + def load(self): + data = self.fileobj.read(DataChunk.CHUNK_SIZE) + if len(data) != DataChunk.CHUNK_SIZE: + raise error("DSF chunk truncated") + + self.chunk_header = data[0:4] + if self.chunk_header != b"data": + raise error("DSF data header not found") + + self.chunk_size = cdata.ulonglong_le(data[4:12]) + if self.chunk_size < DataChunk.CHUNK_SIZE: + raise error("DSF data header size mismatch") + + def pprint(self): + return u"data Chunk (Chunk Offset = %d, Chunk Size = %d)" % ( + self.chunk_offset, self.chunk_size) + + +class _DSFID3(ID3): + """A DSF file with ID3v2 tags""" + + @convert_error(IOError, error) + def _pre_load_header(self, fileobj): + fileobj.seek(0) + id3_location = DSDChunk(fileobj).offset_metdata_chunk + if id3_location == 0: + raise ID3NoHeaderError("File has no existing ID3 tag") + + fileobj.seek(id3_location) + + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething, v2_version=4, v23_sep='/', padding=None): + """Save ID3v2 data to the DSF file""" + + fileobj = filething.fileobj + fileobj.seek(0) + + dsd_header = DSDChunk(fileobj) + if dsd_header.offset_metdata_chunk == 0: + # create a new ID3 chunk at the end of the file + fileobj.seek(0, 2) + + # store reference to ID3 location + dsd_header.offset_metdata_chunk = fileobj.tell() + dsd_header.write() + + try: + data = self._prepare_data( + fileobj, dsd_header.offset_metdata_chunk, self.size, + v2_version, v23_sep, padding) + except ID3Error as e: + reraise(error, e, sys.exc_info()[2]) + + fileobj.seek(dsd_header.offset_metdata_chunk) + fileobj.write(data) + fileobj.truncate() + + # Update total file size + dsd_header.total_size = fileobj.tell() + dsd_header.write() + + +class DSFInfo(StreamInfo): + """DSF audio stream information. + + Information is parsed from the fmt chunk of the DSF file. + + Attributes: + length (`float`): audio length, in seconds. + channels (`int`): The number of audio channels. + sample_rate (`int`): + Sampling frequency, in Hz. + (2822400, 5644800, 11289600, or 22579200) + bits_per_sample (`int`): The audio sample size. + bitrate (`int`): The audio bitrate. + """ + + def __init__(self, fmt_chunk): + self.fmt_chunk = fmt_chunk + + @property + def length(self): + return float(self.fmt_chunk.sample_count) / self.sample_rate + + @property + def channels(self): + return self.fmt_chunk.channel_num + + @property + def sample_rate(self): + return self.fmt_chunk.sampling_frequency + + @property + def bits_per_sample(self): + return self.fmt_chunk.bits_per_sample + + @property + def bitrate(self): + return self.sample_rate * self.bits_per_sample * self.channels + + def pprint(self): + return u"%d channel DSF @ %d bits, %s Hz, %.2f seconds" % ( + self.channels, self.bits_per_sample, self.sample_rate, self.length) + + +class DSFFile(object): + + dsd_chunk = None + fmt_chunk = None + data_chunk = None + + def __init__(self, fileobj): + self.dsd_chunk = DSDChunk(fileobj) + self.fmt_chunk = FormatChunk(fileobj) + self.data_chunk = DataChunk(fileobj) + + +class DSF(FileType): + """An DSF audio file. + + Arguments: + filething (filething) + + Attributes: + info (`DSFInfo`) + tags (`mutagen.id3.ID3Tags` or `None`) + """ + + _mimes = ["audio/dsf"] + + @staticmethod + def score(filename, fileobj, header): + return header.startswith(b"DSD ") * 2 + \ + endswith(filename.lower(), ".dsf") + + def add_tags(self): + """Add a DSF tag block to the file.""" + + if self.tags is None: + self.tags = _DSFID3() + else: + raise error("an ID3 tag already exists") + + @convert_error(IOError, error) + @loadfile() + def load(self, filething, **kwargs): + dsf_file = DSFFile(filething.fileobj) + + try: + self.tags = _DSFID3(filething.fileobj, **kwargs) + except ID3NoHeaderError: + self.tags = None + except ID3Error as e: + raise error(e) + else: + self.tags.filename = self.filename + + self.info = DSFInfo(dsf_file.fmt_chunk) + + @loadfile(writable=True) + def delete(self, filething): + self.tags = None + delete(filething) + + +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """Remove tags from a file. + + Args: + filething (filething) + Raises: + mutagen.MutagenError + """ + + dsf_file = DSFFile(filething.fileobj) + + if dsf_file.dsd_chunk.offset_metdata_chunk != 0: + id3_location = dsf_file.dsd_chunk.offset_metdata_chunk + dsf_file.dsd_chunk.offset_metdata_chunk = 0 + dsf_file.dsd_chunk.write() + + filething.fileobj.seek(id3_location) + filething.fileobj.truncate() + + +Open = DSF diff --git a/libs/mutagen/easyid3.py b/libs/mutagen/easyid3.py index f8dd2de0..71fb4e23 100644 --- a/libs/mutagen/easyid3.py +++ b/libs/mutagen/easyid3.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Easier access to ID3 tags. @@ -16,7 +16,7 @@ import mutagen.id3 from ._compat import iteritems, text_type, PY2 from mutagen import Metadata -from mutagen._util import DictMixin, dict_match +from mutagen._util import DictMixin, dict_match, loadfile from mutagen.id3 import ID3, error, delete, ID3FileType @@ -32,7 +32,9 @@ class EasyID3KeyError(KeyError, ValueError, error): class EasyID3(DictMixin, Metadata): - """A file with an ID3 tag. + """EasyID3(filething=None) + + A file with an ID3 tag. Like Vorbis comments, EasyID3 keys are case-insensitive ASCII strings. Only a subset of ID3 frames are supported by default. Use @@ -148,19 +150,14 @@ class EasyID3(DictMixin, Metadata): return list(id3[frameid]) def setter(id3, key, value): - try: - frame = id3[frameid] - except KeyError: - enc = 0 - # Store 8859-1 if we can, per MusicBrainz spec. - for v in value: - if v and max(v) > u'\x7f': - enc = 3 - break + enc = 0 + # Store 8859-1 if we can, per MusicBrainz spec. + for v in value: + if v and max(v) > u'\x7f': + enc = 3 + break - id3.add(mutagen.id3.TXXX(encoding=enc, text=value, desc=desc)) - else: - frame.text = value + id3.add(mutagen.id3.TXXX(encoding=enc, text=value, desc=desc)) def deleter(id3, key): del(id3[frameid]) @@ -175,10 +172,30 @@ class EasyID3(DictMixin, Metadata): load = property(lambda s: s.__id3.load, lambda s, v: setattr(s.__id3, 'load', v)) - def save(self, *args, **kwargs): - # ignore v2_version until we support 2.3 here - kwargs.pop("v2_version", None) - self.__id3.save(*args, **kwargs) + @loadfile(writable=True, create=True) + def save(self, filething, v1=1, v2_version=4, v23_sep='/', padding=None): + """save(filething=None, v1=1, v2_version=4, v23_sep='/', padding=None) + + Save changes to a file. + See :meth:`mutagen.id3.ID3.save` for more info. + """ + + if v2_version == 3: + # EasyID3 only works with v2.4 frames, so update_to_v23() would + # break things. We have to save a shallow copy of all tags + # and restore it after saving. Due to CHAP/CTOC copying has + # to be done recursively implemented in ID3Tags. + backup = self.__id3._copy() + try: + self.__id3.update_to_v23() + self.__id3.save( + filething, v1=v1, v2_version=v2_version, v23_sep=v23_sep, + padding=padding) + finally: + self.__id3._restore(backup) + else: + self.__id3.save(filething, v1=v1, v2_version=v2_version, + v23_sep=v23_sep, padding=padding) delete = property(lambda s: s.__id3.delete, lambda s, v: setattr(s.__id3, 'delete', v)) @@ -186,34 +203,32 @@ class EasyID3(DictMixin, Metadata): filename = property(lambda s: s.__id3.filename, lambda s, fn: setattr(s.__id3, 'filename', fn)) - size = property(lambda s: s.__id3.size, - lambda s, fn: setattr(s.__id3, 'size', s)) + @property + def size(self): + return self.__id3.size def __getitem__(self, key): - key = key.lower() - func = dict_match(self.Get, key, self.GetFallback) + func = dict_match(self.Get, key.lower(), self.GetFallback) if func is not None: return func(self.__id3, key) else: raise EasyID3KeyError("%r is not a valid key" % key) def __setitem__(self, key, value): - key = key.lower() if PY2: if isinstance(value, basestring): value = [value] else: if isinstance(value, text_type): value = [value] - func = dict_match(self.Set, key, self.SetFallback) + func = dict_match(self.Set, key.lower(), self.SetFallback) if func is not None: return func(self.__id3, key, value) else: raise EasyID3KeyError("%r is not a valid key" % key) def __delitem__(self, key): - key = key.lower() - func = dict_match(self.Delete, key, self.DeleteFallback) + func = dict_match(self.Delete, key.lower(), self.DeleteFallback) if func is not None: return func(self.__id3, key) else: @@ -469,7 +484,7 @@ for frameid, key in iteritems({ "TIT2": "title", "TIT3": "version", "TPE1": "artist", - "TPE2": "performer", + "TPE2": "albumartist", "TPE3": "conductor", "TPE4": "arranger", "TPOS": "discnumber", @@ -518,6 +533,7 @@ for desc, key in iteritems({ u"MusicBrainz Disc Id": "musicbrainz_discid", u"ASIN": "asin", u"ALBUMARTISTSORT": "albumartistsort", + u"PERFORMER": "performer", u"BARCODE": "barcode", u"CATALOGNUMBER": "catalognumber", u"MusicBrainz Release Track Id": "musicbrainz_releasetrackid", @@ -530,5 +546,15 @@ for desc, key in iteritems({ class EasyID3FileType(ID3FileType): - """Like ID3FileType, but uses EasyID3 for tags.""" + """EasyID3FileType(filething=None) + + Like ID3FileType, but uses EasyID3 for tags. + + Arguments: + filething (filething) + + Attributes: + tags (`EasyID3`) + """ + ID3 = EasyID3 diff --git a/libs/mutagen/easymp4.py b/libs/mutagen/easymp4.py index 8ad7fd0e..ccc3a652 100644 --- a/libs/mutagen/easymp4.py +++ b/libs/mutagen/easymp4.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2009 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. from mutagen import Tags from mutagen._util import DictMixin, dict_match @@ -20,7 +20,9 @@ class EasyMP4KeyError(error, KeyError, ValueError): class EasyMP4Tags(DictMixin, Tags): - """A file with MPEG-4 iTunes metadata. + """EasyMP4Tags() + + A file with MPEG-4 iTunes metadata. Like Vorbis comments, EasyMP4Tags keys are case-insensitive ASCII strings, and values are a list of Unicode strings (and these lists @@ -40,11 +42,14 @@ class EasyMP4Tags(DictMixin, Tags): self.load = self.__mp4.load self.save = self.__mp4.save self.delete = self.__mp4.delete - self._padding = self.__mp4._padding filename = property(lambda s: s.__mp4.filename, lambda s, fn: setattr(s.__mp4, 'filename', fn)) + @property + def _padding(self): + return self.__mp4._padding + @classmethod def RegisterKey(cls, key, getter=None, setter=None, deleter=None, lister=None): @@ -268,11 +273,14 @@ for name, key in { class EasyMP4(MP4): - """Like :class:`MP4 <mutagen.mp4.MP4>`, - but uses :class:`EasyMP4Tags` for tags. + """EasyMP4(filelike) - :ivar info: :class:`MP4Info <mutagen.mp4.MP4Info>` - :ivar tags: :class:`EasyMP4Tags` + Like :class:`MP4 <mutagen.mp4.MP4>`, but uses :class:`EasyMP4Tags` for + tags. + + Attributes: + info (`mutagen.mp4.MP4Info`) + tags (`EasyMP4Tags`) """ MP4Tags = EasyMP4Tags diff --git a/libs/mutagen/flac.py b/libs/mutagen/flac.py index f3cc5ab5..fc351dd4 100644 --- a/libs/mutagen/flac.py +++ b/libs/mutagen/flac.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write FLAC Vorbis comments and stream information. @@ -27,13 +27,14 @@ from ._vorbis import VCommentDict import mutagen from ._compat import cBytesIO, endswith, chr_, xrange -from mutagen._util import resize_bytes, MutagenError, get_size +from mutagen._util import resize_bytes, MutagenError, get_size, loadfile, \ + convert_error from mutagen._tags import PaddingInfo -from mutagen.id3 import BitPaddedInt +from mutagen.id3._util import BitPaddedInt from functools import reduce -class error(IOError, MutagenError): +class error(MutagenError): pass @@ -57,7 +58,8 @@ class StrictFileObject(object): def __init__(self, fileobj): self._fileobj = fileobj - for m in ["close", "tell", "seek", "write", "name"]: + for m in ["close", "tell", "seek", "write", "name", "flush", + "truncate"]: if hasattr(fileobj, m): setattr(self, m, getattr(fileobj, m)) @@ -79,8 +81,7 @@ class MetadataBlock(object): blocks, and also as a container for data blobs of unknown blocks. Attributes: - - * data -- raw binary data for this block + data (`bytes`): raw binary data for this block """ _distrust_size = False @@ -168,7 +169,9 @@ class MetadataBlock(object): class StreamInfo(MetadataBlock, mutagen.StreamInfo): - """FLAC stream information. + """StreamInfo() + + FLAC stream information. This contains information about the audio data in the FLAC file. Unlike most stream information objects in Mutagen, changes to this @@ -177,17 +180,18 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): attributes of this block. Attributes: - - * min_blocksize -- minimum audio block size - * max_blocksize -- maximum audio block size - * sample_rate -- audio sample rate in Hz - * channels -- audio channels (1 for mono, 2 for stereo) - * bits_per_sample -- bits per sample - * total_samples -- total samples in file - * length -- audio length in seconds + min_blocksize (`int`): minimum audio block size + max_blocksize (`int`): maximum audio block size + sample_rate (`int`): audio sample rate in Hz + channels (`int`): audio channels (1 for mono, 2 for stereo) + bits_per_sample (`int`): bits per sample + total_samples (`int`): total samples in file + length (`float`): audio length in seconds + bitrate (`int`): bitrate in bits per second, as an int """ code = 0 + bitrate = 0 def __eq__(self, other): try: @@ -197,7 +201,7 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): self.channels == other.channels and self.bits_per_sample == other.bits_per_sample and self.total_samples == other.total_samples) - except: + except Exception: return False __hash__ = MetadataBlock.__hash__ @@ -259,7 +263,9 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): class SeekPoint(tuple): - """A single seek point in a FLAC file. + """SeekPoint() + + A single seek point in a FLAC file. Placeholder seek points have first_sample of 0xFFFFFFFFFFFFFFFFL, and byte_offset and num_samples undefined. Seek points must be @@ -269,10 +275,9 @@ class SeekPoint(tuple): may be any number of them. Attributes: - - * first_sample -- sample number of first sample in the target frame - * byte_offset -- offset from first frame to target frame - * num_samples -- number of samples in target frame + first_sample (`int`): sample number of first sample in the target frame + byte_offset (`int`): offset from first frame to target frame + num_samples (`int`): number of samples in target frame """ def __new__(cls, first_sample, byte_offset, num_samples): @@ -288,8 +293,7 @@ class SeekTable(MetadataBlock): """Read and write FLAC seek tables. Attributes: - - * seekpoints -- list of SeekPoint objects + seekpoints: list of SeekPoint objects """ __SEEKPOINT_FORMAT = '>QQH' @@ -332,7 +336,9 @@ class SeekTable(MetadataBlock): class VCFLACDict(VCommentDict): - """Read and write FLAC Vorbis comments. + """VCFLACDict() + + Read and write FLAC Vorbis comments. FLACs don't use the framing bit at the end of the comment block. So this extends VCommentDict to not use the framing bit. @@ -349,7 +355,9 @@ class VCFLACDict(VCommentDict): class CueSheetTrackIndex(tuple): - """Index for a track in a cuesheet. + """CueSheetTrackIndex(index_number, index_offset) + + Index for a track in a cuesheet. For CD-DA, an index_number of 0 corresponds to the track pre-gap. The first index in a track must have a number of 0 or 1, @@ -358,9 +366,8 @@ class CueSheetTrackIndex(tuple): divisible by 588 samples. Attributes: - - * index_number -- index point number - * index_offset -- offset in samples from track start + index_number (`int`): index point number + index_offset (`int`): offset in samples from track start """ def __new__(cls, index_number, index_offset): @@ -372,7 +379,9 @@ class CueSheetTrackIndex(tuple): class CueSheetTrack(object): - """A track in a cuesheet. + """CueSheetTrack() + + A track in a cuesheet. For CD-DA, track_numbers must be 1-99, or 170 for the lead-out. Track_numbers must be unique within a cue sheet. There @@ -380,13 +389,13 @@ class CueSheetTrack(object): which must have none. Attributes: - - * track_number -- track number - * start_offset -- track offset in samples from start of FLAC stream - * isrc -- ISRC code - * type -- 0 for audio, 1 for digital data - * pre_emphasis -- true if the track is recorded with pre-emphasis - * indexes -- list of CueSheetTrackIndex objects + track_number (`int`): track number + start_offset (`int`): track offset in samples from start of FLAC stream + isrc (`mutagen.text`): ISRC code, exactly 12 characters + type (`int`): 0 for audio, 1 for digital data + pre_emphasis (`bool`): true if the track is recorded with pre-emphasis + indexes (List[`mutagen.flac.CueSheetTrackIndex`]): + list of CueSheetTrackIndex objects """ def __init__(self, track_number, start_offset, isrc='', type_=0, @@ -419,19 +428,24 @@ class CueSheetTrack(object): class CueSheet(MetadataBlock): - """Read and write FLAC embedded cue sheets. + """CueSheet() + + Read and write FLAC embedded cue sheets. Number of tracks should be from 1 to 100. There should always be exactly one lead-out track and that track must be the last track in the cue sheet. Attributes: - - * media_catalog_number -- media catalog number in ASCII - * lead_in_samples -- number of lead-in samples - * compact_disc -- true if the cuesheet corresponds to a compact disc - * tracks -- list of CueSheetTrack objects - * lead_out -- lead-out as CueSheetTrack or None if lead-out was not found + media_catalog_number (`mutagen.text`): media catalog number in ASCII, + up to 128 characters + lead_in_samples (`int`): number of lead-in samples + compact_disc (`bool`): true if the cuesheet corresponds to a + compact disc + tracks (List[`mutagen.flac.CueSheetTrack`]): + list of CueSheetTrack objects + lead_out (`mutagen.flac.CueSheetTrack` or `None`): + lead-out as CueSheetTrack or None if lead-out was not found """ __CUESHEET_FORMAT = '>128sQB258xB' @@ -521,19 +535,23 @@ class CueSheet(MetadataBlock): class Picture(MetadataBlock): - """Read and write FLAC embed pictures. + """Picture() + + Read and write FLAC embed pictures. + + .. currentmodule:: mutagen Attributes: - - * type -- picture type (same as types for ID3 APIC frames) - * mime -- MIME type of the picture - * desc -- picture's description - * width -- width in pixels - * height -- height in pixels - * depth -- color depth in bits-per-pixel - * colors -- number of colors for indexed palettes (like GIF), - 0 for non-indexed - * data -- picture data + type (`id3.PictureType`): picture type + (same as types for ID3 APIC frames) + mime (`text`): MIME type of the picture + desc (`text`): picture's description + width (`int`): width in pixels + height (`int`): height in pixels + depth (`int`): color depth in bits-per-pixel + colors (`int`): number of colors for indexed palettes (like GIF), + 0 for non-indexed + data (`bytes`): picture data To create a picture from file (in order to add to a FLAC file), instantiate this object without passing anything to the constructor and @@ -608,11 +626,16 @@ class Picture(MetadataBlock): class Padding(MetadataBlock): - """Empty padding space for metadata blocks. + """Padding() + + Empty padding space for metadata blocks. To avoid rewriting the entire FLAC file when editing comments, metadata is often padded. Padding should occur at the end, and no more than one padding block should be in any FLAC file. + + Attributes: + length (`int`): length """ code = 1 @@ -645,22 +668,25 @@ class Padding(MetadataBlock): class FLAC(mutagen.FileType): - """A FLAC audio file. + """FLAC(filething) + + A FLAC audio file. + + Args: + filething (filething) Attributes: - - * cuesheet -- CueSheet object, if any - * seektable -- SeekTable object, if any - * pictures -- list of embedded pictures + cuesheet (`CueSheet`): if any or `None` + seektable (`SeekTable`): if any or `None` + pictures (List[`Picture`]): list of embedded pictures + info (`StreamInfo`) + tags (`mutagen._vorbis.VCommentDict`) """ _mimes = ["audio/flac", "audio/x-flac", "application/x-flac"] info = None - """A `StreamInfo`""" - tags = None - """A `VCommentDict`""" METADATA_BLOCKS = [StreamInfo, Padding, None, SeekTable, VCFLACDict, CueSheet, Picture] @@ -730,49 +756,63 @@ class FLAC(mutagen.FileType): add_vorbiscomment = add_tags - def delete(self, filename=None): + @loadfile(writable=True) + def delete(self, filething): """Remove Vorbis comments from a file. If no filename is given, the one most recently loaded is used. """ - if filename is None: - filename = self.filename if self.tags is not None: self.metadata_blocks.remove(self.tags) - self.save(padding=lambda x: 0) - self.metadata_blocks.append(self.tags) + try: + self.save(filething, padding=lambda x: 0) + finally: + self.metadata_blocks.append(self.tags) self.tags.clear() vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.") - def load(self, filename): + @convert_error(IOError, error) + @loadfile() + def load(self, filething): """Load file information from a filename.""" + fileobj = filething.fileobj + self.metadata_blocks = [] self.tags = None self.cuesheet = None self.seektable = None - self.filename = filename - fileobj = StrictFileObject(open(filename, "rb")) - try: - self.__check_header(fileobj) - while self.__read_metadata_block(fileobj): - pass - finally: - fileobj.close() + + fileobj = StrictFileObject(fileobj) + self.__check_header(fileobj, filething.name) + while self.__read_metadata_block(fileobj): + pass try: self.metadata_blocks[0].length except (AttributeError, IndexError): raise FLACNoHeaderError("Stream info block not found") + if self.info.length: + start = fileobj.tell() + fileobj.seek(0, 2) + self.info.bitrate = int( + float(fileobj.tell() - start) * 8 / self.info.length) + else: + self.info.bitrate = 0 + @property def info(self): return self.metadata_blocks[0] def add_picture(self, picture): - """Add a new picture to the file.""" + """Add a new picture to the file. + + Args: + picture (Picture) + """ self.metadata_blocks.append(picture) def clear_pictures(self): @@ -783,51 +823,58 @@ class FLAC(mutagen.FileType): @property def pictures(self): - """List of embedded pictures""" + """ + Returns: + List[`Picture`]: List of embedded pictures + """ return [b for b in self.metadata_blocks if b.code == Picture.code] - def save(self, filename=None, deleteid3=False, padding=None): + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething, deleteid3=False, padding=None): """Save metadata blocks to a file. + Args: + filething (filething) + deleteid3 (bool): delete id3 tags while at it + padding (:obj:`mutagen.PaddingFunction`) + If no filename is given, the one most recently loaded is used. """ - if filename is None: - filename = self.filename + f = StrictFileObject(filething.fileobj) + header = self.__check_header(f, filething.name) + audio_offset = self.__find_audio_offset(f) + # "fLaC" and maybe ID3 + available = audio_offset - header - with open(filename, 'rb+') as f: - header = self.__check_header(f) - audio_offset = self.__find_audio_offset(f) - # "fLaC" and maybe ID3 - available = audio_offset - header + # Delete ID3v2 + if deleteid3 and header > 4: + available += header - 4 + header = 4 - # Delete ID3v2 - if deleteid3 and header > 4: - available += header - 4 - header = 4 + content_size = get_size(f) - audio_offset + assert content_size >= 0 + data = MetadataBlock._writeblocks( + self.metadata_blocks, available, content_size, padding) + data_size = len(data) - content_size = get_size(f) - audio_offset - assert content_size >= 0 - data = MetadataBlock._writeblocks( - self.metadata_blocks, available, content_size, padding) - data_size = len(data) + resize_bytes(filething.fileobj, available, data_size, header) + f.seek(header - 4) + f.write(b"fLaC") + f.write(data) - resize_bytes(f, available, data_size, header) - f.seek(header - 4) - f.write(b"fLaC") - f.write(data) - - # Delete ID3v1 - if deleteid3: - try: + # Delete ID3v1 + if deleteid3: + try: + f.seek(-128, 2) + except IOError: + pass + else: + if f.read(3) == b"TAG": f.seek(-128, 2) - except IOError: - pass - else: - if f.read(3) == b"TAG": - f.seek(-128, 2) - f.truncate() + f.truncate() def __find_audio_offset(self, fileobj): byte = 0x00 @@ -847,7 +894,7 @@ class FLAC(mutagen.FileType): fileobj.read(size) return fileobj.tell() - def __check_header(self, fileobj): + def __check_header(self, fileobj, name): """Returns the offset of the flac block start (skipping id3 tags if found). The passed fileobj will be advanced to that offset as well. @@ -864,13 +911,24 @@ class FLAC(mutagen.FileType): size = None if size is None: raise FLACNoHeaderError( - "%r is not a valid FLAC file" % fileobj.name) + "%r is not a valid FLAC file" % name) return size Open = FLAC -def delete(filename): - """Remove tags from a file.""" - FLAC(filename).delete() +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """Remove tags from a file. + + Args: + filething (filething) + Raises: + mutagen.MutagenError + """ + + f = FLAC(filething) + filething.fileobj.seek(0) + f.delete(filething) diff --git a/libs/mutagen/id3/__init__.py b/libs/mutagen/id3/__init__.py index 11bf54ed..9033c76c 100644 --- a/libs/mutagen/id3/__init__.py +++ b/libs/mutagen/id3/__init__.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # 2006 Lukas Lalinsky # 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """ID3v2 reading and writing. @@ -30,1064 +30,67 @@ Since this file's documentation is a little unwieldy, you are probably interested in the :class:`ID3` class to start with. """ -__all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete'] - -import struct -import errno - -from struct import unpack, pack, error as StructError - -import mutagen -from mutagen._util import insert_bytes, delete_bytes, DictProxy, enum -from mutagen._tags import PaddingInfo -from .._compat import chr_, PY3 - -from ._util import * -from ._frames import * -from ._specs import * - - -@enum -class ID3v1SaveOptions(object): - - REMOVE = 0 - """ID3v1 tags will be removed""" - - UPDATE = 1 - """ID3v1 tags will be updated but not added""" - - CREATE = 2 - """ID3v1 tags will be created and/or updated""" - - -def _fullread(fileobj, size): - """Read a certain number of bytes from the source file. - - Raises ValueError on invalid size input or EOFError/IOError. - """ - - if size < 0: - raise ValueError('Requested bytes (%s) less than zero' % size) - data = fileobj.read(size) - if len(data) != size: - raise EOFError("Not enough data to read") - return data - - -class ID3Header(object): - - _V24 = (2, 4, 0) - _V23 = (2, 3, 0) - _V22 = (2, 2, 0) - _V11 = (1, 1) - - f_unsynch = property(lambda s: bool(s._flags & 0x80)) - f_extended = property(lambda s: bool(s._flags & 0x40)) - f_experimental = property(lambda s: bool(s._flags & 0x20)) - f_footer = property(lambda s: bool(s._flags & 0x10)) - - def __init__(self, fileobj=None): - """Raises ID3NoHeaderError, ID3UnsupportedVersionError or error""" - - if fileobj is None: - # for testing - self._flags = 0 - return - - fn = getattr(fileobj, "name", "<unknown>") - try: - data = _fullread(fileobj, 10) - except EOFError: - raise ID3NoHeaderError("%s: too small" % fn) - - id3, vmaj, vrev, flags, size = unpack('>3sBBB4s', data) - self._flags = flags - self.size = BitPaddedInt(size) + 10 - self.version = (2, vmaj, vrev) - - if id3 != b'ID3': - raise ID3NoHeaderError("%r doesn't start with an ID3 tag" % fn) - - if vmaj not in [2, 3, 4]: - raise ID3UnsupportedVersionError("%r ID3v2.%d not supported" - % (fn, vmaj)) - - if not BitPaddedInt.has_valid_padding(size): - raise error("Header size not synchsafe") - - if (self.version >= self._V24) and (flags & 0x0f): - raise error( - "%r has invalid flags %#02x" % (fn, flags)) - elif (self._V23 <= self.version < self._V24) and (flags & 0x1f): - raise error( - "%r has invalid flags %#02x" % (fn, flags)) - - if self.f_extended: - try: - extsize_data = _fullread(fileobj, 4) - except EOFError: - raise error("%s: too small" % fn) - - if PY3: - frame_id = extsize_data.decode("ascii", "replace") - else: - frame_id = extsize_data - - if frame_id in Frames: - # Some tagger sets the extended header flag but - # doesn't write an extended header; in this case, the - # ID3 data follows immediately. Since no extended - # header is going to be long enough to actually match - # a frame, and if it's *not* a frame we're going to be - # completely lost anyway, this seems to be the most - # correct check. - # https://github.com/quodlibet/quodlibet/issues/126 - self._flags ^= 0x40 - extsize = 0 - fileobj.seek(-4, 1) - elif self.version >= self._V24: - # "Where the 'Extended header size' is the size of the whole - # extended header, stored as a 32 bit synchsafe integer." - extsize = BitPaddedInt(extsize_data) - 4 - if not BitPaddedInt.has_valid_padding(extsize_data): - raise error( - "Extended header size not synchsafe") - else: - # "Where the 'Extended header size', currently 6 or 10 bytes, - # excludes itself." - extsize = unpack('>L', extsize_data)[0] - - try: - self._extdata = _fullread(fileobj, extsize) - except EOFError: - raise error("%s: too small" % fn) - - -class ID3(DictProxy, mutagen.Metadata): - """A file with an ID3v2 tag. - - Attributes: - - * version -- ID3 tag version as a tuple - * unknown_frames -- raw frame data of any unknown frames found - * size -- the total size of the ID3 tag, including the header - """ - - __module__ = "mutagen.id3" - - PEDANTIC = True - """Deprecated. Doesn't have any effect""" - - filename = None - - def __init__(self, *args, **kwargs): - self.unknown_frames = [] - self.__unknown_version = None - self._header = None - self._version = (2, 4, 0) - super(ID3, self).__init__(*args, **kwargs) - - @property - def version(self): - """ID3 tag version as a tuple (of the loaded file)""" - - if self._header is not None: - return self._header.version - return self._version - - @version.setter - def version(self, value): - self._version = value - - @property - def f_unsynch(self): - if self._header is not None: - return self._header.f_unsynch - return False - - @property - def f_extended(self): - if self._header is not None: - return self._header.f_extended - return False - - @property - def size(self): - if self._header is not None: - return self._header.size - return 0 - - def _pre_load_header(self, fileobj): - # XXX: for aiff to adjust the offset.. - pass - - def load(self, filename, known_frames=None, translate=True, v2_version=4): - """Load tags from a filename. - - Keyword arguments: - - * filename -- filename to load tag data from - * known_frames -- dict mapping frame IDs to Frame objects - * translate -- Update all tags to ID3v2.3/4 internally. If you - intend to save, this must be true or you have to - call update_to_v23() / update_to_v24() manually. - * v2_version -- if update_to_v23 or update_to_v24 get called (3 or 4) - - Example of loading a custom frame:: - - my_frames = dict(mutagen.id3.Frames) - class XMYF(Frame): ... - my_frames["XMYF"] = XMYF - mutagen.id3.ID3(filename, known_frames=my_frames) - """ - - if v2_version not in (3, 4): - raise ValueError("Only 3 and 4 possible for v2_version") - - self.filename = filename - self.unknown_frames = [] - self.__known_frames = known_frames - self._header = None - self._padding = 0 # for testing - - with open(filename, 'rb') as fileobj: - self._pre_load_header(fileobj) - - try: - self._header = ID3Header(fileobj) - except (ID3NoHeaderError, ID3UnsupportedVersionError): - frames, offset = _find_id3v1(fileobj) - if frames is None: - raise - - self.version = ID3Header._V11 - for v in frames.values(): - self.add(v) - else: - frames = self.__known_frames - if frames is None: - if self.version >= ID3Header._V23: - frames = Frames - elif self.version >= ID3Header._V22: - frames = Frames_2_2 - - try: - data = _fullread(fileobj, self.size - 10) - except (ValueError, EOFError, IOError) as e: - raise error(e) - - for frame in self.__read_frames(data, frames=frames): - if isinstance(frame, Frame): - self.add(frame) - else: - self.unknown_frames.append(frame) - self.__unknown_version = self.version[:2] - - if translate: - if v2_version == 3: - self.update_to_v23() - else: - self.update_to_v24() - - def getall(self, key): - """Return all frames with a given name (the list may be empty). - - This is best explained by examples:: - - id3.getall('TIT2') == [id3['TIT2']] - id3.getall('TTTT') == [] - id3.getall('TXXX') == [TXXX(desc='woo', text='bar'), - TXXX(desc='baz', text='quuuux'), ...] - - Since this is based on the frame's HashKey, which is - colon-separated, you can use it to do things like - ``getall('COMM:MusicMatch')`` or ``getall('TXXX:QuodLibet:')``. - """ - if key in self: - return [self[key]] - else: - key = key + ":" - return [v for s, v in self.items() if s.startswith(key)] - - def delall(self, key): - """Delete all tags of a given kind; see getall.""" - if key in self: - del(self[key]) - else: - key = key + ":" - for k in list(self.keys()): - if k.startswith(key): - del(self[k]) - - def setall(self, key, values): - """Delete frames of the given type and add frames in 'values'.""" - self.delall(key) - for tag in values: - self[tag.HashKey] = tag - - def pprint(self): - """Return tags in a human-readable format. - - "Human-readable" is used loosely here. The format is intended - to mirror that used for Vorbis or APEv2 output, e.g. - - ``TIT2=My Title`` - - However, ID3 frames can have multiple keys: - - ``POPM=user@example.org=3 128/255`` - """ - frames = sorted(Frame.pprint(s) for s in self.values()) - return "\n".join(frames) - - def loaded_frame(self, tag): - """Deprecated; use the add method.""" - # turn 2.2 into 2.3/2.4 tags - if len(type(tag).__name__) == 3: - tag = type(tag).__base__(tag) - self[tag.HashKey] = tag - - # add = loaded_frame (and vice versa) break applications that - # expect to be able to override loaded_frame (e.g. Quod Libet), - # as does making loaded_frame call add. - def add(self, frame): - """Add a frame to the tag.""" - return self.loaded_frame(frame) - - def __setitem__(self, key, tag): - if not isinstance(tag, Frame): - raise TypeError("%r not a Frame instance" % tag) - super(ID3, self).__setitem__(key, tag) - - def __read_frames(self, data, frames): - assert self.version >= ID3Header._V22 - - if self.version < ID3Header._V24 and self.f_unsynch: - try: - data = unsynch.decode(data) - except ValueError: - pass - - if self.version >= ID3Header._V23: - if self.version < ID3Header._V24: - bpi = int - else: - bpi = _determine_bpi(data, frames) - - while data: - header = data[:10] - try: - name, size, flags = unpack('>4sLH', header) - except struct.error: - return # not enough header - if name.strip(b'\x00') == b'': - return - - size = bpi(size) - framedata = data[10:10 + size] - data = data[10 + size:] - self._padding = len(data) - if size == 0: - continue # drop empty frames - - if PY3: - try: - name = name.decode('ascii') - except UnicodeDecodeError: - continue - - try: - # someone writes 2.3 frames with 2.2 names - if name[-1] == "\x00": - tag = Frames_2_2[name[:-1]] - name = tag.__base__.__name__ - - tag = frames[name] - except KeyError: - if is_valid_frame_id(name): - yield header + framedata - else: - try: - yield tag._fromData(self._header, flags, framedata) - except NotImplementedError: - yield header + framedata - except ID3JunkFrameError: - pass - elif self.version >= ID3Header._V22: - while data: - header = data[0:6] - try: - name, size = unpack('>3s3s', header) - except struct.error: - return # not enough header - size, = struct.unpack('>L', b'\x00' + size) - if name.strip(b'\x00') == b'': - return - - framedata = data[6:6 + size] - data = data[6 + size:] - self._padding = len(data) - if size == 0: - continue # drop empty frames - - if PY3: - try: - name = name.decode('ascii') - except UnicodeDecodeError: - continue - - try: - tag = frames[name] - except KeyError: - if is_valid_frame_id(name): - yield header + framedata - else: - try: - yield tag._fromData(self._header, 0, framedata) - except (ID3EncryptionUnsupportedError, - NotImplementedError): - yield header + framedata - except ID3JunkFrameError: - pass - - def _prepare_data(self, fileobj, start, available, v2_version, v23_sep, - pad_func): - if v2_version == 3: - version = ID3Header._V23 - elif v2_version == 4: - version = ID3Header._V24 - else: - raise ValueError("Only 3 or 4 allowed for v2_version") - - # Sort frames by 'importance' - order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"] - order = dict((b, a) for a, b in enumerate(order)) - last = len(order) - frames = sorted(self.items(), - key=lambda a: (order.get(a[0][:4], last), a[0])) - - framedata = [self.__save_frame(frame, version=version, v23_sep=v23_sep) - for (key, frame) in frames] - - # only write unknown frames if they were loaded from the version - # we are saving with or upgraded to it - if self.__unknown_version == version[:2]: - framedata.extend(data for data in self.unknown_frames - if len(data) > 10) - - needed = sum(map(len, framedata)) + 10 - - fileobj.seek(0, 2) - trailing_size = fileobj.tell() - start - - info = PaddingInfo(available - needed, trailing_size) - new_padding = info._get_padding(pad_func) - if new_padding < 0: - raise error("invalid padding") - new_size = needed + new_padding - - new_framesize = BitPaddedInt.to_str(new_size - 10, width=4) - header = pack('>3sBBB4s', b'ID3', v2_version, 0, 0, new_framesize) - - data = bytearray(header) - for frame in framedata: - data += frame - assert new_size >= len(data) - data += (new_size - len(data)) * b'\x00' - assert new_size == len(data) - - return data - - def save(self, filename=None, v1=1, v2_version=4, v23_sep='/', - padding=None): - """Save changes to a file. - - Args: - filename: - Filename to save the tag to. If no filename is given, - the one most recently loaded is used. - v1 (ID3v1SaveOptions): - if 0, ID3v1 tags will be removed. - if 1, ID3v1 tags will be updated but not added. - if 2, ID3v1 tags will be created and/or updated - v2 (int): - version of ID3v2 tags (3 or 4). - v23_sep (str): - the separator used to join multiple text values - if v2_version == 3. Defaults to '/' but if it's None - will be the ID3v2v2.4 null separator. - padding (function): - A function taking a PaddingInfo which should - return the amount of padding to use. If None (default) - will default to something reasonable. - - By default Mutagen saves ID3v2.4 tags. If you want to save ID3v2.3 - tags, you must call method update_to_v23 before saving the file. - - The lack of a way to update only an ID3v1 tag is intentional. - - Can raise id3.error. - """ - - if filename is None: - filename = self.filename - - try: - f = open(filename, 'rb+') - except IOError as err: - from errno import ENOENT - if err.errno != ENOENT: - raise - f = open(filename, 'ab') # create, then reopen - f = open(filename, 'rb+') - - try: - try: - header = ID3Header(f) - except ID3NoHeaderError: - old_size = 0 - else: - old_size = header.size - - data = self._prepare_data( - f, 0, old_size, v2_version, v23_sep, padding) - new_size = len(data) - - if (old_size < new_size): - insert_bytes(f, new_size - old_size, old_size) - elif (old_size > new_size): - delete_bytes(f, old_size - new_size, new_size) - f.seek(0) - f.write(data) - - self.__save_v1(f, v1) - - finally: - f.close() - - def __save_v1(self, f, v1): - tag, offset = _find_id3v1(f) - has_v1 = tag is not None - - f.seek(offset, 2) - if v1 == ID3v1SaveOptions.UPDATE and has_v1 or \ - v1 == ID3v1SaveOptions.CREATE: - f.write(MakeID3v1(self)) - else: - f.truncate() - - def delete(self, filename=None, delete_v1=True, delete_v2=True): - """Remove tags from a file. - - If no filename is given, the one most recently loaded is used. - - Keyword arguments: - - * delete_v1 -- delete any ID3v1 tag - * delete_v2 -- delete any ID3v2 tag - """ - if filename is None: - filename = self.filename - delete(filename, delete_v1, delete_v2) - self.clear() - - def __save_frame(self, frame, name=None, version=ID3Header._V24, - v23_sep=None): - flags = 0 - if isinstance(frame, TextFrame): - if len(str(frame)) == 0: - return b'' - - if version == ID3Header._V23: - framev23 = frame._get_v23_frame(sep=v23_sep) - framedata = framev23._writeData() - else: - framedata = frame._writeData() - - usize = len(framedata) - if usize > 2048: - # Disabled as this causes iTunes and other programs - # to fail to find these frames, which usually includes - # e.g. APIC. - # framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib') - # flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN - pass - - if version == ID3Header._V24: - bits = 7 - elif version == ID3Header._V23: - bits = 8 - else: - raise ValueError - - datasize = BitPaddedInt.to_str(len(framedata), width=4, bits=bits) - - if name is not None: - assert isinstance(name, bytes) - frame_name = name - else: - frame_name = type(frame).__name__ - if PY3: - frame_name = frame_name.encode("ascii") - - header = pack('>4s4sH', frame_name, datasize, flags) - return header + framedata - - def __update_common(self): - """Updates done by both v23 and v24 update""" - - if "TCON" in self: - # Get rid of "(xx)Foobr" format. - self["TCON"].genres = self["TCON"].genres - - mimes = {"PNG": "image/png", "JPG": "image/jpeg"} - for pic in self.getall("APIC"): - if pic.mime in mimes: - newpic = APIC( - encoding=pic.encoding, mime=mimes[pic.mime], - type=pic.type, desc=pic.desc, data=pic.data) - self.add(newpic) - - def update_to_v24(self): - """Convert older tags into an ID3v2.4 tag. - - This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to - TDRC). If you intend to save tags, you must call this function - at some point; it is called by default when loading the tag. - """ - - self.__update_common() - - if self.__unknown_version == (2, 3): - # convert unknown 2.3 frames (flags/size) to 2.4 - converted = [] - for frame in self.unknown_frames: - try: - name, size, flags = unpack('>4sLH', frame[:10]) - except struct.error: - continue - - try: - frame = BinaryFrame._fromData( - self._header, flags, frame[10:]) - except (error, NotImplementedError): - continue - - converted.append(self.__save_frame(frame, name=name)) - self.unknown_frames[:] = converted - self.__unknown_version = (2, 4) - - # TDAT, TYER, and TIME have been turned into TDRC. - try: - date = text_type(self.get("TYER", "")) - if date.strip(u"\x00"): - self.pop("TYER") - dat = text_type(self.get("TDAT", "")) - if dat.strip("\x00"): - self.pop("TDAT") - date = "%s-%s-%s" % (date, dat[2:], dat[:2]) - time = text_type(self.get("TIME", "")) - if time.strip("\x00"): - self.pop("TIME") - date += "T%s:%s:00" % (time[:2], time[2:]) - if "TDRC" not in self: - self.add(TDRC(encoding=0, text=date)) - except UnicodeDecodeError: - # Old ID3 tags have *lots* of Unicode problems, so if TYER - # is bad, just chuck the frames. - pass - - # TORY can be the first part of a TDOR. - if "TORY" in self: - f = self.pop("TORY") - if "TDOR" not in self: - try: - self.add(TDOR(encoding=0, text=str(f))) - except UnicodeDecodeError: - pass - - # IPLS is now TIPL. - if "IPLS" in self: - f = self.pop("IPLS") - if "TIPL" not in self: - self.add(TIPL(encoding=f.encoding, people=f.people)) - - # These can't be trivially translated to any ID3v2.4 tags, or - # should have been removed already. - for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME", "CRM"]: - if key in self: - del(self[key]) - - def update_to_v23(self): - """Convert older (and newer) tags into an ID3v2.3 tag. - - This updates incompatible ID3v2 frames to ID3v2.3 ones. If you - intend to save tags as ID3v2.3, you must call this function - at some point. - - If you want to to go off spec and include some v2.4 frames - in v2.3, remove them before calling this and add them back afterwards. - """ - - self.__update_common() - - # we could downgrade unknown v2.4 frames here, but given that - # the main reason to save v2.3 is compatibility and this - # might increase the chance of some parser breaking.. better not - - # TMCL, TIPL -> TIPL - if "TIPL" in self or "TMCL" in self: - people = [] - if "TIPL" in self: - f = self.pop("TIPL") - people.extend(f.people) - if "TMCL" in self: - f = self.pop("TMCL") - people.extend(f.people) - if "IPLS" not in self: - self.add(IPLS(encoding=f.encoding, people=people)) - - # TDOR -> TORY - if "TDOR" in self: - f = self.pop("TDOR") - if f.text: - d = f.text[0] - if d.year and "TORY" not in self: - self.add(TORY(encoding=f.encoding, text="%04d" % d.year)) - - # TDRC -> TYER, TDAT, TIME - if "TDRC" in self: - f = self.pop("TDRC") - if f.text: - d = f.text[0] - if d.year and "TYER" not in self: - self.add(TYER(encoding=f.encoding, text="%04d" % d.year)) - if d.month and d.day and "TDAT" not in self: - self.add(TDAT(encoding=f.encoding, - text="%02d%02d" % (d.day, d.month))) - if d.hour and d.minute and "TIME" not in self: - self.add(TIME(encoding=f.encoding, - text="%02d%02d" % (d.hour, d.minute))) - - # New frames added in v2.4 - v24_frames = [ - 'ASPI', 'EQU2', 'RVA2', 'SEEK', 'SIGN', 'TDEN', 'TDOR', - 'TDRC', 'TDRL', 'TDTG', 'TIPL', 'TMCL', 'TMOO', 'TPRO', - 'TSOA', 'TSOP', 'TSOT', 'TSST', - ] - - for key in v24_frames: - if key in self: - del(self[key]) - - -def delete(filename, delete_v1=True, delete_v2=True): - """Remove tags from a file. - - Keyword arguments: - - * delete_v1 -- delete any ID3v1 tag - * delete_v2 -- delete any ID3v2 tag - """ - - with open(filename, 'rb+') as f: - - if delete_v1: - tag, offset = _find_id3v1(f) - if tag is not None: - f.seek(offset, 2) - f.truncate() - - # technically an insize=0 tag is invalid, but we delete it anyway - # (primarily because we used to write it) - if delete_v2: - f.seek(0, 0) - idata = f.read(10) - try: - id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata) - except struct.error: - id3, insize = b'', -1 - insize = BitPaddedInt(insize) - if id3 == b'ID3' and insize >= 0: - delete_bytes(f, insize + 10, 0) - +from ._file import ID3, ID3FileType, delete, ID3v1SaveOptions +from ._specs import Encoding, PictureType, CTOCFlags, ID3TimeStamp +from ._frames import Frames, Frames_2_2, Frame, TextFrame, UrlFrame, \ + UrlFrameU, TimeStampTextFrame, BinaryFrame, NumericPartTextFrame, \ + NumericTextFrame, PairedTextFrame +from ._util import ID3NoHeaderError, error, ID3UnsupportedVersionError +from ._id3v1 import ParseID3v1, MakeID3v1 +from ._tags import ID3Tags +from ._frames import (AENC, APIC, ASPI, BUF, CHAP, CNT, COM, COMM, COMR, CRA, + CRM, CTOC, ENCR, EQU2, ETC, ETCO, GEO, GEOB, GP1, GRID, GRP1, IPL, IPLS, + LINK, LNK, MCDI, MCI, MLL, MLLT, MVI, MVIN, MVN, MVNM, OWNE, PCNT, PCST, + PIC, POP, POPM, POSS, PRIV, RBUF, REV, RVA, RVA2, RVAD, RVRB, SEEK, SIGN, + SLT, STC, SYLT, SYTC, TAL, TALB, TBP, TBPM, TCAT, TCM, TCMP, TCO, TCOM, + TCON, TCOP, TCP, TCR, TDA, TDAT, TDEN, TDES, TDLY, TDOR, TDRC, TDRL, TDTG, + TDY, TEN, TENC, TEXT, TFLT, TFT, TGID, TIM, TIME, TIPL, TIT1, TIT2, TIT3, + TKE, TKEY, TKWD, TLA, TLAN, TLE, TLEN, TMCL, TMED, TMOO, TMT, TOA, TOAL, + TOF, TOFN, TOL, TOLY, TOPE, TOR, TORY, TOT, TOWN, TP1, TP2, TP3, TP4, TPA, + TPB, TPE1, TPE2, TPE3, TPE4, TPOS, TPRO, TPUB, TRC, TRCK, TRD, TRDA, TRK, + TRSN, TRSO, TS2, TSA, TSC, TSI, TSIZ, TSO2, TSOA, TSOC, TSOP, TSOT, TSP, + TSRC, TSS, TSSE, TSST, TST, TT1, TT2, TT3, TXT, TXX, TXXX, TYE, TYER, UFI, + UFID, ULT, USER, USLT, WAF, WAR, WAS, WCM, WCOM, WCOP, WCP, WFED, WOAF, + WOAR, WOAS, WORS, WPAY, WPB, WPUB, WXX, WXXX) + +# deprecated +from ._util import ID3EncryptionUnsupportedError, ID3JunkFrameError, \ + ID3BadUnsynchData, ID3BadCompressedData, ID3TagError, ID3Warning, \ + BitPaddedInt as _BitPaddedIntForPicard # support open(filename) as interface Open = ID3 +# pyflakes +ID3, ID3FileType, delete, ID3v1SaveOptions, Encoding, PictureType, CTOCFlags, +ID3TimeStamp, Frames, Frames_2_2, Frame, TextFrame, UrlFrame, UrlFrameU, +TimeStampTextFrame, BinaryFrame, NumericPartTextFrame, NumericTextFrame, +PairedTextFrame, ID3NoHeaderError, error, ID3UnsupportedVersionError, +ParseID3v1, MakeID3v1, ID3Tags, ID3EncryptionUnsupportedError, +ID3JunkFrameError, ID3BadUnsynchData, ID3BadCompressedData, ID3TagError, +ID3Warning -def _determine_bpi(data, frames, EMPTY=b"\x00" * 10): - """Takes id3v2.4 frame data and determines if ints or bitpaddedints - should be used for parsing. Needed because iTunes used to write - normal ints for frame sizes. - """ - - # count number of tags found as BitPaddedInt and how far past - o = 0 - asbpi = 0 - while o < len(data) - 10: - part = data[o:o + 10] - if part == EMPTY: - bpioff = -((len(data) - o) % 10) - break - name, size, flags = unpack('>4sLH', part) - size = BitPaddedInt(size) - o += 10 + size - if PY3: - try: - name = name.decode("ascii") - except UnicodeDecodeError: - continue - if name in frames: - asbpi += 1 - else: - bpioff = o - len(data) - - # count number of tags found as int and how far past - o = 0 - asint = 0 - while o < len(data) - 10: - part = data[o:o + 10] - if part == EMPTY: - intoff = -((len(data) - o) % 10) - break - name, size, flags = unpack('>4sLH', part) - o += 10 + size - if PY3: - try: - name = name.decode("ascii") - except UnicodeDecodeError: - continue - if name in frames: - asint += 1 - else: - intoff = o - len(data) - - # if more tags as int, or equal and bpi is past and int is not - if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)): - return int - return BitPaddedInt +AENC, APIC, ASPI, BUF, CHAP, CNT, COM, COMM, COMR, CRA, CRM, CTOC, ENCR, EQU2, +ETC, ETCO, GEO, GEOB, GP1, GRID, GRP1, IPL, IPLS, LINK, LNK, MCDI, MCI, MLL, +MLLT, MVI, MVIN, MVN, MVNM, OWNE, PCNT, PCST, PIC, POP, POPM, POSS, PRIV, +RBUF, REV, RVA, RVA2, RVAD, RVRB, SEEK, SIGN, SLT, STC, SYLT, SYTC, TAL, TALB, +TBP, TBPM, TCAT, TCM, TCMP, TCO, TCOM, TCON, TCOP, TCP, TCR, TDA, TDAT, TDEN, +TDES, TDLY, TDOR, TDRC, TDRL, TDTG, TDY, TEN, TENC, TEXT, TFLT, TFT, TGID, +TIM, TIME, TIPL, TIT1, TIT2, TIT3, TKE, TKEY, TKWD, TLA, TLAN, TLE, TLEN, +TMCL, TMED, TMOO, TMT, TOA, TOAL, TOF, TOFN, TOL, TOLY, TOPE, TOR, TORY, TOT, +TOWN, TP1, TP2, TP3, TP4, TPA, TPB, TPE1, TPE2, TPE3, TPE4, TPOS, TPRO, TPUB, +TRC, TRCK, TRD, TRDA, TRK, TRSN, TRSO, TS2, TSA, TSC, TSI, TSIZ, TSO2, TSOA, +TSOC, TSOP, TSOT, TSP, TSRC, TSS, TSSE, TSST, TST, TT1, TT2, TT3, TXT, TXX, +TXXX, TYE, TYER, UFI, UFID, ULT, USER, USLT, WAF, WAR, WAS, WCM, WCOM, WCOP, +WCP, WFED, WOAF, WOAR, WOAS, WORS, WPAY, WPB, WPUB, WXX, WXXX -def _find_id3v1(fileobj): - """Returns a tuple of (id3tag, offset_to_end) or (None, 0) +# Workaround for http://tickets.musicbrainz.org/browse/PICARD-833 +class _DummySpecForPicard(object): + write = None - offset mainly because we used to write too short tags in some cases and - we need the offset to delete them. - """ - - # id3v1 is always at the end (after apev2) - - extra_read = b"APETAGEX".index(b"TAG") - - try: - fileobj.seek(-128 - extra_read, 2) - except IOError as e: - if e.errno == errno.EINVAL: - # If the file is too small, might be ok since we wrote too small - # tags at some point. let's see how the parsing goes.. - fileobj.seek(0, 0) - else: - raise - - data = fileobj.read(128 + extra_read) - try: - idx = data.index(b"TAG") - except ValueError: - return (None, 0) - else: - # FIXME: make use of the apev2 parser here - # if TAG is part of APETAGEX assume this is an APEv2 tag - try: - ape_idx = data.index(b"APETAGEX") - except ValueError: - pass - else: - if idx == ape_idx + extra_read: - return (None, 0) - - tag = ParseID3v1(data[idx:]) - if tag is None: - return (None, 0) - - offset = idx - len(data) - return (tag, offset) +EncodedTextSpec = MultiSpec = _DummySpecForPicard +BitPaddedInt = _BitPaddedIntForPicard -# ID3v1.1 support. -def ParseID3v1(data): - """Parse an ID3v1 tag, returning a list of ID3v2.4 frames. - - Returns a {frame_name: frame} dict or None. - """ - - try: - data = data[data.index(b"TAG"):] - except ValueError: - return None - if 128 < len(data) or len(data) < 124: - return None - - # Issue #69 - Previous versions of Mutagen, when encountering - # out-of-spec TDRC and TYER frames of less than four characters, - # wrote only the characters available - e.g. "1" or "" - into the - # year field. To parse those, reduce the size of the year field. - # Amazingly, "0s" works as a struct format string. - unpack_fmt = "3s30s30s30s%ds29sBB" % (len(data) - 124) - - try: - tag, title, artist, album, year, comment, track, genre = unpack( - unpack_fmt, data) - except StructError: - return None - - if tag != b"TAG": - return None - - def fix(data): - return data.split(b"\x00")[0].strip().decode('latin1') - - title, artist, album, year, comment = map( - fix, [title, artist, album, year, comment]) - - frames = {} - if title: - frames["TIT2"] = TIT2(encoding=0, text=title) - if artist: - frames["TPE1"] = TPE1(encoding=0, text=[artist]) - if album: - frames["TALB"] = TALB(encoding=0, text=album) - if year: - frames["TDRC"] = TDRC(encoding=0, text=year) - if comment: - frames["COMM"] = COMM( - encoding=0, lang="eng", desc="ID3v1 Comment", text=comment) - # Don't read a track number if it looks like the comment was - # padded with spaces instead of nulls (thanks, WinAmp). - if track and ((track != 32) or (data[-3] == b'\x00'[0])): - frames["TRCK"] = TRCK(encoding=0, text=str(track)) - if genre != 255: - frames["TCON"] = TCON(encoding=0, text=str(genre)) - return frames - - -def MakeID3v1(id3): - """Return an ID3v1.1 tag string from a dict of ID3v2.4 frames.""" - - v1 = {} - - for v2id, name in {"TIT2": "title", "TPE1": "artist", - "TALB": "album"}.items(): - if v2id in id3: - text = id3[v2id].text[0].encode('latin1', 'replace')[:30] - else: - text = b"" - v1[name] = text + (b"\x00" * (30 - len(text))) - - if "COMM" in id3: - cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28] - else: - cmnt = b"" - v1["comment"] = cmnt + (b"\x00" * (29 - len(cmnt))) - - if "TRCK" in id3: - try: - v1["track"] = chr_(+id3["TRCK"]) - except ValueError: - v1["track"] = b"\x00" - else: - v1["track"] = b"\x00" - - if "TCON" in id3: - try: - genre = id3["TCON"].genres[0] - except IndexError: - pass - else: - if genre in TCON.GENRES: - v1["genre"] = chr_(TCON.GENRES.index(genre)) - if "genre" not in v1: - v1["genre"] = b"\xff" - - if "TDRC" in id3: - year = text_type(id3["TDRC"]).encode('ascii') - elif "TYER" in id3: - year = text_type(id3["TYER"]).encode('ascii') - else: - year = b"" - v1["year"] = (year + b"\x00\x00\x00\x00")[:4] - - return ( - b"TAG" + - v1["title"] + - v1["artist"] + - v1["album"] + - v1["year"] + - v1["comment"] + - v1["track"] + - v1["genre"] - ) - - -class ID3FileType(mutagen.FileType): - """An unknown type of file with ID3 tags.""" - - ID3 = ID3 - - class _Info(mutagen.StreamInfo): - length = 0 - - def __init__(self, fileobj, offset): - pass - - @staticmethod - def pprint(): - return "Unknown format with ID3 tag" - - @staticmethod - def score(filename, fileobj, header_data): - return header_data.startswith(b"ID3") - - def add_tags(self, ID3=None): - """Add an empty ID3 tag to the file. - - A custom tag reader may be used in instead of the default - mutagen.id3.ID3 object, e.g. an EasyID3 reader. - """ - if ID3 is None: - ID3 = self.ID3 - if self.tags is None: - self.ID3 = ID3 - self.tags = ID3() - else: - raise error("an ID3 tag already exists") - - def load(self, filename, ID3=None, **kwargs): - """Load stream and tag information from a file. - - A custom tag reader may be used in instead of the default - mutagen.id3.ID3 object, e.g. an EasyID3 reader. - """ - - if ID3 is None: - ID3 = self.ID3 - else: - # If this was initialized with EasyID3, remember that for - # when tags are auto-instantiated in add_tags. - self.ID3 = ID3 - self.filename = filename - try: - self.tags = ID3(filename, **kwargs) - except ID3NoHeaderError: - self.tags = None - - if self.tags is not None: - try: - offset = self.tags.size - except AttributeError: - offset = None - else: - offset = None - - with open(filename, "rb") as fileobj: - self.info = self._Info(fileobj, offset) +__all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete'] diff --git a/libs/mutagen/id3/_file.py b/libs/mutagen/id3/_file.py new file mode 100644 index 00000000..cb8794fc --- /dev/null +++ b/libs/mutagen/id3/_file.py @@ -0,0 +1,406 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2005 Michael Urman +# 2006 Lukas Lalinsky +# 2013 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import struct + +import mutagen +from mutagen._util import insert_bytes, delete_bytes, enum, \ + loadfile, convert_error, read_full +from mutagen._tags import PaddingInfo + +from ._util import error, ID3NoHeaderError, ID3UnsupportedVersionError, \ + BitPaddedInt +from ._tags import ID3Tags, ID3Header, ID3SaveConfig +from ._id3v1 import MakeID3v1, find_id3v1 + + +@enum +class ID3v1SaveOptions(object): + + REMOVE = 0 + """ID3v1 tags will be removed""" + + UPDATE = 1 + """ID3v1 tags will be updated but not added""" + + CREATE = 2 + """ID3v1 tags will be created and/or updated""" + + +class ID3(ID3Tags, mutagen.Metadata): + """ID3(filething=None) + + A file with an ID3v2 tag. + + If any arguments are given, the :meth:`load` is called with them. If no + arguments are given then an empty `ID3` object is created. + + :: + + ID3("foo.mp3") + # same as + t = ID3() + t.load("foo.mp3") + + Arguments: + filething (filething): or `None` + + Attributes: + version (Tuple[int]): ID3 tag version as a tuple + unknown_frames (List[bytes]): raw frame data of any unknown frames + found + size (int): the total size of the ID3 tag, including the header + """ + + __module__ = "mutagen.id3" + + PEDANTIC = True + """`bool`: + + .. deprecated:: 1.28 + + Doesn't have any effect + """ + + filename = None + + def __init__(self, *args, **kwargs): + self._header = None + self._version = (2, 4, 0) + super(ID3, self).__init__(*args, **kwargs) + + @property + def version(self): + """`tuple`: ID3 tag version as a tuple (of the loaded file)""" + + if self._header is not None: + return self._header.version + return self._version + + @version.setter + def version(self, value): + self._version = value + + @property + def f_unsynch(self): + if self._header is not None: + return self._header.f_unsynch + return False + + @property + def f_extended(self): + if self._header is not None: + return self._header.f_extended + return False + + @property + def size(self): + if self._header is not None: + return self._header.size + return 0 + + def _pre_load_header(self, fileobj): + # XXX: for aiff to adjust the offset.. + pass + + @convert_error(IOError, error) + @loadfile() + def load(self, filething, known_frames=None, translate=True, v2_version=4): + """load(filething, known_frames=None, translate=True, v2_version=4) + + Load tags from a filename. + + Args: + filename (filething): filename or file object to load tag data from + known_frames (Dict[`mutagen.text`, `Frame`]): dict mapping frame + IDs to Frame objects + translate (bool): Update all tags to ID3v2.3/4 internally. If you + intend to save, this must be true or you have to + call update_to_v23() / update_to_v24() manually. + v2_version (int): if update_to_v23 or update_to_v24 get called + (3 or 4) + + Example of loading a custom frame:: + + my_frames = dict(mutagen.id3.Frames) + class XMYF(Frame): ... + my_frames["XMYF"] = XMYF + mutagen.id3.ID3(filename, known_frames=my_frames) + """ + + fileobj = filething.fileobj + + if v2_version not in (3, 4): + raise ValueError("Only 3 and 4 possible for v2_version") + + self.unknown_frames = [] + self._header = None + self._padding = 0 + + self._pre_load_header(fileobj) + + try: + self._header = ID3Header(fileobj) + except (ID3NoHeaderError, ID3UnsupportedVersionError): + frames, offset = find_id3v1(fileobj) + if frames is None: + raise + + self.version = ID3Header._V11 + for v in frames.values(): + self.add(v) + else: + # XXX: attach to the header object so we have it in spec parsing.. + if known_frames is not None: + self._header._known_frames = known_frames + + data = read_full(fileobj, self.size - 10) + remaining_data = self._read(self._header, data) + self._padding = len(remaining_data) + + if translate: + if v2_version == 3: + self.update_to_v23() + else: + self.update_to_v24() + + def _prepare_data(self, fileobj, start, available, v2_version, v23_sep, + pad_func): + + if v2_version not in (3, 4): + raise ValueError("Only 3 or 4 allowed for v2_version") + + config = ID3SaveConfig(v2_version, v23_sep) + framedata = self._write(config) + + needed = len(framedata) + 10 + + fileobj.seek(0, 2) + trailing_size = fileobj.tell() - start + + info = PaddingInfo(available - needed, trailing_size) + new_padding = info._get_padding(pad_func) + if new_padding < 0: + raise error("invalid padding") + new_size = needed + new_padding + + new_framesize = BitPaddedInt.to_str(new_size - 10, width=4) + header = struct.pack( + '>3sBBB4s', b'ID3', v2_version, 0, 0, new_framesize) + + data = header + framedata + assert new_size >= len(data) + data += (new_size - len(data)) * b'\x00' + assert new_size == len(data) + + return data + + @convert_error(IOError, error) + @loadfile(writable=True, create=True) + def save(self, filething, v1=1, v2_version=4, v23_sep='/', padding=None): + """save(filething=None, v1=1, v2_version=4, v23_sep='/', padding=None) + + Save changes to a file. + + Args: + filename (fspath): + Filename to save the tag to. If no filename is given, + the one most recently loaded is used. + v1 (ID3v1SaveOptions): + if 0, ID3v1 tags will be removed. + if 1, ID3v1 tags will be updated but not added. + if 2, ID3v1 tags will be created and/or updated + v2 (int): + version of ID3v2 tags (3 or 4). + v23_sep (text): + the separator used to join multiple text values + if v2_version == 3. Defaults to '/' but if it's None + will be the ID3v2v2.4 null separator. + padding (:obj:`mutagen.PaddingFunction`) + + Raises: + mutagen.MutagenError + + By default Mutagen saves ID3v2.4 tags. If you want to save ID3v2.3 + tags, you must call method update_to_v23 before saving the file. + + The lack of a way to update only an ID3v1 tag is intentional. + """ + + f = filething.fileobj + + try: + header = ID3Header(filething.fileobj) + except ID3NoHeaderError: + old_size = 0 + else: + old_size = header.size + + data = self._prepare_data( + f, 0, old_size, v2_version, v23_sep, padding) + new_size = len(data) + + if (old_size < new_size): + insert_bytes(f, new_size - old_size, old_size) + elif (old_size > new_size): + delete_bytes(f, old_size - new_size, new_size) + f.seek(0) + f.write(data) + + self.__save_v1(f, v1) + + def __save_v1(self, f, v1): + tag, offset = find_id3v1(f) + has_v1 = tag is not None + + f.seek(offset, 2) + if v1 == ID3v1SaveOptions.UPDATE and has_v1 or \ + v1 == ID3v1SaveOptions.CREATE: + f.write(MakeID3v1(self)) + else: + f.truncate() + + @loadfile(writable=True) + def delete(self, filething, delete_v1=True, delete_v2=True): + """delete(filething=None, delete_v1=True, delete_v2=True) + + Remove tags from a file. + + Args: + filething (filething): A filename or `None` to use the one used + when loading. + delete_v1 (bool): delete any ID3v1 tag + delete_v2 (bool): delete any ID3v2 tag + + If no filename is given, the one most recently loaded is used. + """ + + delete(filething, delete_v1, delete_v2) + self.clear() + + +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething, delete_v1=True, delete_v2=True): + """Remove tags from a file. + + Args: + delete_v1 (bool): delete any ID3v1 tag + delete_v2 (bool): delete any ID3v2 tag + + Raises: + mutagen.MutagenError: In case deleting failed + """ + + f = filething.fileobj + + if delete_v1: + tag, offset = find_id3v1(f) + if tag is not None: + f.seek(offset, 2) + f.truncate() + + # technically an insize=0 tag is invalid, but we delete it anyway + # (primarily because we used to write it) + if delete_v2: + f.seek(0, 0) + idata = f.read(10) + try: + id3, vmaj, vrev, flags, insize = struct.unpack('>3sBBB4s', idata) + except struct.error: + pass + else: + insize = BitPaddedInt(insize) + if id3 == b'ID3' and insize >= 0: + delete_bytes(f, insize + 10, 0) + + +class ID3FileType(mutagen.FileType): + """ID3FileType(filething, ID3=None, **kwargs) + + An unknown type of file with ID3 tags. + + Args: + filething (filething): A filename or file-like object + ID3 (ID3): An ID3 subclass to use for tags. + + Raises: + mutagen.MutagenError: In case loading the file failed + + Load stream and tag information from a file. + + A custom tag reader may be used in instead of the default + mutagen.id3.ID3 object, e.g. an EasyID3 reader. + """ + + __module__ = "mutagen.id3" + + ID3 = ID3 + + class _Info(mutagen.StreamInfo): + length = 0 + + def __init__(self, fileobj, offset): + pass + + @staticmethod + def pprint(): + return u"Unknown format with ID3 tag" + + @staticmethod + def score(filename, fileobj, header_data): + return header_data.startswith(b"ID3") + + def add_tags(self, ID3=None): + """Add an empty ID3 tag to the file. + + Args: + ID3 (ID3): An ID3 subclass to use or `None` to use the one + that used when loading. + + A custom tag reader may be used in instead of the default + `ID3` object, e.g. an `mutagen.easyid3.EasyID3` reader. + """ + + if ID3 is None: + ID3 = self.ID3 + if self.tags is None: + self.ID3 = ID3 + self.tags = ID3() + else: + raise error("an ID3 tag already exists") + + @loadfile() + def load(self, filething, ID3=None, **kwargs): + # see __init__ for docs + + fileobj = filething.fileobj + + if ID3 is None: + ID3 = self.ID3 + else: + # If this was initialized with EasyID3, remember that for + # when tags are auto-instantiated in add_tags. + self.ID3 = ID3 + + try: + self.tags = ID3(fileobj, **kwargs) + except ID3NoHeaderError: + self.tags = None + + if self.tags is not None: + try: + offset = self.tags.size + except AttributeError: + offset = None + else: + offset = None + + self.info = self._Info(fileobj, offset) diff --git a/libs/mutagen/id3/_frames.py b/libs/mutagen/id3/_frames.py index 33ecf5cd..f50752aa 100644 --- a/libs/mutagen/id3/_frames.py +++ b/libs/mutagen/id3/_frames.py @@ -1,27 +1,25 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import zlib from struct import unpack -from ._util import ID3JunkFrameError, ID3EncryptionUnsupportedError, unsynch -from ._specs import ( - BinaryDataSpec, StringSpec, Latin1TextSpec, EncodedTextSpec, ByteSpec, - EncodingSpec, ASPIIndexSpec, SizedIntegerSpec, IntegerSpec, - VolumeAdjustmentsSpec, VolumePeakSpec, VolumeAdjustmentSpec, - ChannelSpec, MultiSpec, SynchronizedTextSpec, KeyEventSpec, TimeStampSpec, - EncodedNumericPartTextSpec, EncodedNumericTextSpec, SpecError, - PictureTypeSpec) -from .._compat import text_type, string_types, swap_to_string, iteritems, izip - - -def is_valid_frame_id(frame_id): - return frame_id.isalnum() and frame_id.isupper() +from ._util import ID3JunkFrameError, ID3EncryptionUnsupportedError, unsynch, \ + ID3SaveConfig, error +from ._specs import BinaryDataSpec, StringSpec, Latin1TextSpec, \ + EncodedTextSpec, ByteSpec, EncodingSpec, ASPIIndexSpec, SizedIntegerSpec, \ + IntegerSpec, Encoding, VolumeAdjustmentsSpec, VolumePeakSpec, \ + VolumeAdjustmentSpec, ChannelSpec, MultiSpec, SynchronizedTextSpec, \ + KeyEventSpec, TimeStampSpec, EncodedNumericPartTextSpec, \ + EncodedNumericTextSpec, SpecError, PictureTypeSpec, ID3FramesSpec, \ + Latin1TextListSpec, CTOCFlagsSpec, FrameIDSpec, RVASpec +from .._compat import text_type, string_types, swap_to_string, iteritems, \ + izip, itervalues def _bytes2key(b): @@ -54,6 +52,7 @@ class Frame(object): FLAG24_DATALEN = 0x0001 _framespec = [] + _optionalspec = [] def __init__(self, *args, **kwargs): if len(args) == 1 and len(kwargs) == 0 and \ @@ -65,22 +64,63 @@ class Frame(object): for checker, val in izip(self._framespec, args): setattr(self, checker.name, val) for checker in self._framespec[len(args):]: - setattr(self, checker.name, kwargs.get(checker.name)) + setattr(self, checker.name, + kwargs.get(checker.name, checker.default)) + for spec in self._optionalspec: + if spec.name in kwargs: + setattr(self, spec.name, kwargs[spec.name]) + else: + break def __setattr__(self, name, value): for checker in self._framespec: if checker.name == name: - self.__dict__[name] = checker.validate(self, value) + self._setattr(name, checker.validate(self, value)) + return + for checker in self._optionalspec: + if checker.name == name: + self._setattr(name, checker.validate(self, value)) return super(Frame, self).__setattr__(name, value) + def _setattr(self, name, value): + self.__dict__[name] = value + def _to_other(self, other): # this impl covers subclasses with the same framespec if other._framespec is not self._framespec: raise ValueError for checker in other._framespec: - setattr(other, checker.name, getattr(self, checker.name)) + other._setattr(checker.name, getattr(self, checker.name)) + + # this impl covers subclasses with the same optionalspec + if other._optionalspec is not self._optionalspec: + raise ValueError + + for checker in other._optionalspec: + if hasattr(self, checker.name): + other._setattr(checker.name, getattr(self, checker.name)) + + def _merge_frame(self, other): + # default impl, use the new tag over the old one + return other + + def _upgrade_frame(self): + """Returns either this instance or a new instance if this is a v2.2 + frame and an upgrade to a v2.3/4 equivalent is viable. + + If this is a v2.2 instance and there is no upgrade path, returns None. + """ + + # turn 2.2 into 2.3/2.4 tags + if len(type(self).__name__) == 3: + base = type(self).__base__ + if base is Frame: + return + return base(self) + else: + return self def _get_v23_frame(self, **kwargs): """Returns a frame copy which is suitable for writing into a v2.3 tag. @@ -93,6 +133,13 @@ class Frame(object): name = checker.name value = getattr(self, name) new_kwargs[name] = checker._validate23(self, value, **kwargs) + + for checker in self._optionalspec: + name = checker.name + if hasattr(self, name): + value = getattr(self, name) + new_kwargs[name] = checker._validate23(self, value, **kwargs) + return type(self)(**new_kwargs) @property @@ -118,27 +165,64 @@ class Frame(object): # so repr works during __init__ if hasattr(self, attr.name): kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) + for attr in self._optionalspec: + if hasattr(self, attr.name): + kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) return '%s(%s)' % (type(self).__name__, ', '.join(kw)) - def _readData(self, data): + def _readData(self, id3, data): """Raises ID3JunkFrameError; Returns leftover data""" for reader in self._framespec: - if len(data): + if len(data) or reader.handle_nodata: try: - value, data = reader.read(self, data) + value, data = reader.read(id3, self, data) except SpecError as e: raise ID3JunkFrameError(e) else: raise ID3JunkFrameError("no data left") - setattr(self, reader.name, value) + self._setattr(reader.name, value) + + for reader in self._optionalspec: + if len(data) or reader.handle_nodata: + try: + value, data = reader.read(id3, self, data) + except SpecError as e: + raise ID3JunkFrameError(e) + else: + break + self._setattr(reader.name, value) return data - def _writeData(self): + def _writeData(self, config=None): + """Raises error""" + + if config is None: + config = ID3SaveConfig() + + if config.v2_version == 3: + frame = self._get_v23_frame(sep=config.v23_separator) + else: + frame = self + data = [] for writer in self._framespec: - data.append(writer.write(self, getattr(self, writer.name))) + try: + data.append( + writer.write(config, frame, getattr(frame, writer.name))) + except SpecError as e: + raise error(e) + + for writer in self._optionalspec: + try: + data.append( + writer.write(config, frame, getattr(frame, writer.name))) + except AttributeError: + break + except SpecError as e: + raise error(e) + return b''.join(data) def pprint(self): @@ -149,7 +233,7 @@ class Frame(object): return "[unrepresentable data]" @classmethod - def _fromData(cls, id3, tflags, data): + def _fromData(cls, header, tflags, data): """Construct this ID3 frame from raw string data. Raises: @@ -159,7 +243,7 @@ class Frame(object): ID3EncryptionUnsupportedError in case the frame is encrypted. """ - if id3.version >= id3._V24: + if header.version >= header._V24: if tflags & (Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN): # The data length int is syncsafe in 2.4 (but not 2.3). # However, we don't actually need the data length int, @@ -167,7 +251,7 @@ class Frame(object): # all we need are the raw bytes. datalen_bytes = data[:4] data = data[4:] - if tflags & Frame.FLAG24_UNSYNCH or id3.f_unsynch: + if tflags & Frame.FLAG24_UNSYNCH or header.f_unsynch: try: data = unsynch.decode(data) except ValueError: @@ -181,7 +265,7 @@ class Frame(object): if tflags & Frame.FLAG24_COMPRESS: try: data = zlib.decompress(data) - except zlib.error as err: + except zlib.error: # the initial mutagen that went out with QL 0.12 did not # write the 4 bytes of uncompressed size. Compensate. data = datalen_bytes + data @@ -191,7 +275,7 @@ class Frame(object): raise ID3JunkFrameError( 'zlib: %s: %r' % (err, data)) - elif id3.version >= id3._V23: + elif header.version >= header._V23: if tflags & Frame.FLAG23_COMPRESS: usize, = unpack('>L', data[:4]) data = data[4:] @@ -204,93 +288,93 @@ class Frame(object): raise ID3JunkFrameError('zlib: %s: %r' % (err, data)) frame = cls() - frame._readData(data) + frame._readData(header, data) return frame def __hash__(self): raise TypeError("Frame objects are unhashable") -class FrameOpt(Frame): - """A frame with optional parts. +class CHAP(Frame): + """Chapter""" - Some ID3 frames have optional data; this class extends Frame to - provide support for those parts. - """ + _framespec = [ + Latin1TextSpec("element_id"), + SizedIntegerSpec("start_time", 4, default=0), + SizedIntegerSpec("end_time", 4, default=0), + SizedIntegerSpec("start_offset", 4, default=0xffffffff), + SizedIntegerSpec("end_offset", 4, default=0xffffffff), + ID3FramesSpec("sub_frames"), + ] - _optionalspec = [] + @property + def HashKey(self): + return '%s:%s' % (self.FrameID, self.element_id) - def __init__(self, *args, **kwargs): - super(FrameOpt, self).__init__(*args, **kwargs) - for spec in self._optionalspec: - if spec.name in kwargs: - setattr(self, spec.name, kwargs[spec.name]) - else: - break + def __eq__(self, other): + if not isinstance(other, CHAP): + return False - def __setattr__(self, name, value): - for checker in self._optionalspec: - if checker.name == name: - self.__dict__[name] = checker.validate(self, value) - return - super(FrameOpt, self).__setattr__(name, value) + self_frames = self.sub_frames or {} + other_frames = other.sub_frames or {} + if sorted(self_frames.values()) != sorted(other_frames.values()): + return False - def _to_other(self, other): - super(FrameOpt, self)._to_other(other) + return self.element_id == other.element_id and \ + self.start_time == other.start_time and \ + self.end_time == other.end_time and \ + self.start_offset == other.start_offset and \ + self.end_offset == other.end_offset - # this impl covers subclasses with the same optionalspec - if other._optionalspec is not self._optionalspec: - raise ValueError + __hash__ = Frame.__hash__ - for checker in other._optionalspec: - if hasattr(self, checker.name): - setattr(other, checker.name, getattr(self, checker.name)) + def _pprint(self): + frame_pprint = u"" + for frame in itervalues(self.sub_frames): + for line in frame.pprint().splitlines(): + frame_pprint += "\n" + " " * 4 + line + return u"%s time=%d..%d offset=%d..%d%s" % ( + self.element_id, self.start_time, self.end_time, + self.start_offset, self.end_offset, frame_pprint) - def _readData(self, data): - """Raises ID3JunkFrameError; Returns leftover data""" - for reader in self._framespec: - if len(data): - try: - value, data = reader.read(self, data) - except SpecError as e: - raise ID3JunkFrameError(e) - else: - raise ID3JunkFrameError("no data left") - setattr(self, reader.name, value) +class CTOC(Frame): + """Table of contents""" - if data: - for reader in self._optionalspec: - if len(data): - try: - value, data = reader.read(self, data) - except SpecError as e: - raise ID3JunkFrameError(e) - else: - break - setattr(self, reader.name, value) + _framespec = [ + Latin1TextSpec("element_id"), + CTOCFlagsSpec("flags", default=0), + Latin1TextListSpec("child_element_ids"), + ID3FramesSpec("sub_frames"), + ] - return data + @property + def HashKey(self): + return '%s:%s' % (self.FrameID, self.element_id) - def _writeData(self): - data = [] - for writer in self._framespec: - data.append(writer.write(self, getattr(self, writer.name))) - for writer in self._optionalspec: - try: - data.append(writer.write(self, getattr(self, writer.name))) - except AttributeError: - break - return b''.join(data) + __hash__ = Frame.__hash__ - def __repr__(self): - kw = [] - for attr in self._framespec: - kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) - for attr in self._optionalspec: - if hasattr(self, attr.name): - kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) - return '%s(%s)' % (type(self).__name__, ', '.join(kw)) + def __eq__(self, other): + if not isinstance(other, CTOC): + return False + + self_frames = self.sub_frames or {} + other_frames = other.sub_frames or {} + if sorted(self_frames.values()) != sorted(other_frames.values()): + return False + + return self.element_id == other.element_id and \ + self.flags == other.flags and \ + self.child_element_ids == other.child_element_ids + + def _pprint(self): + frame_pprint = u"" + if getattr(self, "sub_frames", None): + frame_pprint += "\n" + "\n".join( + [" " * 4 + f.pprint() for f in self.sub_frames.values()]) + return u"%s flags=%d child_element_ids=%s%s" % ( + self.element_id, int(self.flags), + u",".join(self.child_element_ids), frame_pprint) @swap_to_string @@ -310,8 +394,8 @@ class TextFrame(Frame): """ _framespec = [ - EncodingSpec('encoding'), - MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000'), + EncodingSpec('encoding', default=Encoding.UTF16), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), ] def __bytes__(self): @@ -345,6 +429,14 @@ class TextFrame(Frame): return self.text.extend(value) + def _merge_frame(self, other): + # merge in new values + for val in other[:]: + if val not in self: + self.append(val) + self.encoding = max(self.encoding, other.encoding) + return self + def _pprint(self): return " / ".join(self.text) @@ -359,8 +451,9 @@ class NumericTextFrame(TextFrame): """ _framespec = [ - EncodingSpec('encoding'), - MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000'), + EncodingSpec('encoding', default=Encoding.UTF16), + MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000', + default=[]), ] def __pos__(self): @@ -379,8 +472,9 @@ class NumericPartTextFrame(TextFrame): """ _framespec = [ - EncodingSpec('encoding'), - MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000'), + EncodingSpec('encoding', default=Encoding.UTF16), + MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000', + default=[]), ] def __pos__(self): @@ -396,8 +490,8 @@ class TimeStampTextFrame(TextFrame): """ _framespec = [ - EncodingSpec('encoding'), - MultiSpec('text', TimeStampSpec('stamp'), sep=u','), + EncodingSpec('encoding', default=Encoding.UTF16), + MultiSpec('text', TimeStampSpec('stamp'), sep=u',', default=[]), ] def __bytes__(self): @@ -423,7 +517,9 @@ class UrlFrame(Frame): ASCII. """ - _framespec = [Latin1TextSpec('url')] + _framespec = [ + Latin1TextSpec('url'), + ] def __bytes__(self): return self.url.encode('utf-8') @@ -550,6 +646,38 @@ class TDES(TextFrame): "iTunes Podcast Description" +class TKWD(TextFrame): + "iTunes Podcast Keywords" + + +class TCAT(TextFrame): + "iTunes Podcast Category" + + +class MVNM(TextFrame): + "iTunes Movement Name" + + +class MVN(MVNM): + "iTunes Movement Name" + + +class MVIN(NumericPartTextFrame): + "iTunes Movement Number/Count" + + +class MVI(MVIN): + "iTunes Movement Number/Count" + + +class GRP1(TextFrame): + "iTunes Grouping" + + +class GP1(GRP1): + "iTunes Grouping" + + class TDOR(TimeStampTextFrame): "Original Release Time" @@ -741,7 +869,7 @@ class TXXX(TextFrame): _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'), - MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000'), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), ] @property @@ -795,7 +923,7 @@ class WXXX(UrlFrame): """ _framespec = [ - EncodingSpec('encoding'), + EncodingSpec('encoding', default=Encoding.UTF16), EncodedTextSpec('desc'), Latin1TextSpec('url'), ] @@ -818,10 +946,10 @@ class PairedTextFrame(Frame): """ _framespec = [ - EncodingSpec('encoding'), + EncodingSpec('encoding', default=Encoding.UTF16), MultiSpec('people', EncodedTextSpec('involvement'), - EncodedTextSpec('person')) + EncodedTextSpec('person'), default=[]) ] def __eq__(self, other): @@ -848,7 +976,9 @@ class BinaryFrame(Frame): The 'data' attribute contains the raw byte string. """ - _framespec = [BinaryDataSpec('data')] + _framespec = [ + BinaryDataSpec('data'), + ] def __eq__(self, other): return self.data == other @@ -864,8 +994,8 @@ class ETCO(Frame): """Event timing codes.""" _framespec = [ - ByteSpec("format"), - KeyEventSpec("events"), + ByteSpec("format", default=1), + KeyEventSpec("events", default=[]), ] def __eq__(self, other): @@ -882,11 +1012,11 @@ class MLLT(Frame): """ _framespec = [ - SizedIntegerSpec('frames', 2), - SizedIntegerSpec('bytes', 3), - SizedIntegerSpec('milliseconds', 3), - ByteSpec('bits_for_bytes'), - ByteSpec('bits_for_milliseconds'), + SizedIntegerSpec('frames', size=2, default=0), + SizedIntegerSpec('bytes', size=3, default=0), + SizedIntegerSpec('milliseconds', size=3, default=0), + ByteSpec('bits_for_bytes', default=0), + ByteSpec('bits_for_milliseconds', default=0), BinaryDataSpec('data'), ] @@ -904,7 +1034,7 @@ class SYTC(Frame): """ _framespec = [ - ByteSpec("format"), + ByteSpec("format", default=1), BinaryDataSpec("data"), ] @@ -923,8 +1053,8 @@ class USLT(Frame): """ _framespec = [ - EncodingSpec('encoding'), - StringSpec('lang', 3), + EncodingSpec('encoding', default=Encoding.UTF16), + StringSpec('lang', length=3, default=u"XXX"), EncodedTextSpec('desc'), EncodedTextSpec('text'), ] @@ -944,6 +1074,9 @@ class USLT(Frame): __hash__ = Frame.__hash__ + def _pprint(self): + return "%s=%s=%s" % (self.desc, self.lang, self.text) + @swap_to_string class SYLT(Frame): @@ -951,9 +1084,9 @@ class SYLT(Frame): _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', 3), - ByteSpec('format'), - ByteSpec('type'), + StringSpec('lang', length=3, default=u"XXX"), + ByteSpec('format', default=1), + ByteSpec('type', default=0), EncodedTextSpec('desc'), SynchronizedTextSpec('text'), ] @@ -983,9 +1116,9 @@ class COMM(TextFrame): _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', 3), + StringSpec('lang', length=3, default="XXX"), EncodedTextSpec('desc'), - MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000'), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), ] @property @@ -1015,9 +1148,9 @@ class RVA2(Frame): _framespec = [ Latin1TextSpec('desc'), - ChannelSpec('channel'), - VolumeAdjustmentSpec('gain'), - VolumePeakSpec('peak'), + ChannelSpec('channel', default=1), + VolumeAdjustmentSpec('gain', default=1), + VolumePeakSpec('peak', default=1), ] _channels = ["Other", "Master volume", "Front right", "Front left", @@ -1055,9 +1188,9 @@ class EQU2(Frame): """ _framespec = [ - ByteSpec("method"), + ByteSpec("method", default=0), Latin1TextSpec("desc"), - VolumeAdjustmentsSpec("adjustments"), + VolumeAdjustmentsSpec("adjustments", default=[]), ] def __eq__(self, other): @@ -1070,7 +1203,21 @@ class EQU2(Frame): return '%s:%s' % (self.FrameID, self.desc) -# class RVAD: unsupported +class RVAD(Frame): + """Relative volume adjustment""" + + _framespec = [ + RVASpec("adjustments", stereo_only=False), + ] + + __hash__ = Frame.__hash__ + + def __eq__(self, other): + if not isinstance(other, RVAD): + return False + return self.adjustments == other.adjustments + + # class EQUA: unsupported @@ -1078,16 +1225,16 @@ class RVRB(Frame): """Reverb.""" _framespec = [ - SizedIntegerSpec('left', 2), - SizedIntegerSpec('right', 2), - ByteSpec('bounce_left'), - ByteSpec('bounce_right'), - ByteSpec('feedback_ltl'), - ByteSpec('feedback_ltr'), - ByteSpec('feedback_rtr'), - ByteSpec('feedback_rtl'), - ByteSpec('premix_ltr'), - ByteSpec('premix_rtl'), + SizedIntegerSpec('left', size=2, default=0), + SizedIntegerSpec('right', size=2, default=0), + ByteSpec('bounce_left', default=0), + ByteSpec('bounce_right', default=0), + ByteSpec('feedback_ltl', default=0), + ByteSpec('feedback_ltr', default=0), + ByteSpec('feedback_rtr', default=0), + ByteSpec('feedback_rtl', default=0), + ByteSpec('premix_ltr', default=0), + ByteSpec('premix_rtl', default=0), ] def __eq__(self, other): @@ -1127,15 +1274,17 @@ class APIC(Frame): def HashKey(self): return '%s:%s' % (self.FrameID, self.desc) - def _validate_from_22(self, other, checker): - if checker.name == "mime": - self.mime = other.mime.decode("ascii", "ignore") - else: - super(APIC, self)._validate_from_22(other, checker) + def _merge_frame(self, other): + other.desc += u" " + return other def _pprint(self): - return "%s (%s, %d bytes)" % ( - self.desc, self.mime, len(self.data)) + type_desc = text_type(self.type) + if hasattr(self.type, "_pprint"): + type_desc = self.type._pprint() + + return "%s, %s (%s, %d bytes)" % ( + type_desc, self.desc, self.mime, len(self.data)) class PCNT(Frame): @@ -1147,7 +1296,9 @@ class PCNT(Frame): This frame is basically obsoleted by POPM. """ - _framespec = [IntegerSpec('count')] + _framespec = [ + IntegerSpec('count', default=0), + ] def __eq__(self, other): return self.count == other @@ -1161,7 +1312,26 @@ class PCNT(Frame): return text_type(self.count) -class POPM(FrameOpt): +class PCST(Frame): + """iTunes Podcast Flag""" + + _framespec = [ + IntegerSpec('value', default=0), + ] + + def __eq__(self, other): + return self.value == other + + __hash__ = Frame.__hash__ + + def __pos__(self): + return self.value + + def _pprint(self): + return text_type(self.value) + + +class POPM(Frame): """Popularimeter. This frame keys a rating (out of 255) and a play count to an email @@ -1176,10 +1346,12 @@ class POPM(FrameOpt): _framespec = [ Latin1TextSpec('email'), - ByteSpec('rating'), + ByteSpec('rating', default=0), ] - _optionalspec = [IntegerSpec('count')] + _optionalspec = [ + IntegerSpec('count', default=0), + ] @property def HashKey(self): @@ -1188,7 +1360,7 @@ class POPM(FrameOpt): def __eq__(self, other): return self.rating == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ def __pos__(self): return self.rating @@ -1230,7 +1402,7 @@ class GEOB(Frame): __hash__ = Frame.__hash__ -class RBUF(FrameOpt): +class RBUF(Frame): """Recommended buffer size. Attributes: @@ -1242,24 +1414,26 @@ class RBUF(FrameOpt): Mutagen will not find the next tag itself. """ - _framespec = [SizedIntegerSpec('size', 3)] + _framespec = [ + SizedIntegerSpec('size', size=3, default=0), + ] _optionalspec = [ - ByteSpec('info'), - SizedIntegerSpec('offset', 4), + ByteSpec('info', default=0), + SizedIntegerSpec('offset', size=4, default=0), ] def __eq__(self, other): return self.size == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ def __pos__(self): return self.size @swap_to_string -class AENC(FrameOpt): +class AENC(Frame): """Audio encryption. Attributes: @@ -1274,12 +1448,11 @@ class AENC(FrameOpt): _framespec = [ Latin1TextSpec('owner'), - SizedIntegerSpec('preview_start', 2), - SizedIntegerSpec('preview_length', 2), + SizedIntegerSpec('preview_start', size=2, default=0), + SizedIntegerSpec('preview_length', size=2, default=0), + BinaryDataSpec('data'), ] - _optionalspec = [BinaryDataSpec('data')] - @property def HashKey(self): return '%s:%s' % (self.FrameID, self.owner) @@ -1293,10 +1466,10 @@ class AENC(FrameOpt): def __eq__(self, other): return self.owner == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ -class LINK(FrameOpt): +class LINK(Frame): """Linked information. Attributes: @@ -1307,27 +1480,20 @@ class LINK(FrameOpt): """ _framespec = [ - StringSpec('frameid', 4), + FrameIDSpec('frameid', length=4), Latin1TextSpec('url'), + BinaryDataSpec('data'), ] - _optionalspec = [BinaryDataSpec('data')] - @property def HashKey(self): - try: - return "%s:%s:%s:%s" % ( - self.FrameID, self.frameid, self.url, _bytes2key(self.data)) - except AttributeError: - return "%s:%s:%s" % (self.FrameID, self.frameid, self.url) + return "%s:%s:%s:%s" % ( + self.FrameID, self.frameid, self.url, _bytes2key(self.data)) def __eq__(self, other): - try: - return (self.frameid, self.url, self.data) == other - except AttributeError: - return (self.frameid, self.url) == other + return (self.frameid, self.url, self.data) == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ class POSS(Frame): @@ -1340,8 +1506,8 @@ class POSS(Frame): """ _framespec = [ - ByteSpec('format'), - IntegerSpec('position'), + ByteSpec('format', default=1), + IntegerSpec('position', default=0), ] def __pos__(self): @@ -1396,7 +1562,7 @@ class USER(Frame): _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', 3), + StringSpec('lang', length=3, default=u"XXX"), EncodedTextSpec('text'), ] @@ -1426,7 +1592,7 @@ class OWNE(Frame): _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), - StringSpec('date', 8), + StringSpec('date', length=8, default=u"19700101"), EncodedTextSpec('seller'), ] @@ -1442,15 +1608,15 @@ class OWNE(Frame): __hash__ = Frame.__hash__ -class COMR(FrameOpt): +class COMR(Frame): """Commercial frame.""" _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), - StringSpec('valid_until', 8), + StringSpec('valid_until', length=8, default=u"19700101"), Latin1TextSpec('contact'), - ByteSpec('format'), + ByteSpec('format', default=0), EncodedTextSpec('seller'), EncodedTextSpec('desc'), ] @@ -1467,7 +1633,7 @@ class COMR(FrameOpt): def __eq__(self, other): return self._writeData() == other._writeData() - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ @swap_to_string @@ -1480,7 +1646,7 @@ class ENCR(Frame): _framespec = [ Latin1TextSpec('owner'), - ByteSpec('method'), + ByteSpec('method', default=0x80), BinaryDataSpec('data'), ] @@ -1498,16 +1664,15 @@ class ENCR(Frame): @swap_to_string -class GRID(FrameOpt): +class GRID(Frame): """Group identification registration.""" _framespec = [ Latin1TextSpec('owner'), - ByteSpec('group'), + ByteSpec('group', default=0x80), + BinaryDataSpec('data'), ] - _optionalspec = [BinaryDataSpec('data')] - @property def HashKey(self): return '%s:%s' % (self.FrameID, self.group) @@ -1524,7 +1689,7 @@ class GRID(FrameOpt): def __eq__(self, other): return self.owner == other or self.group == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ @swap_to_string @@ -1558,7 +1723,7 @@ class SIGN(Frame): """Signature frame.""" _framespec = [ - ByteSpec('group'), + ByteSpec('group', default=0x80), BinaryDataSpec('sig'), ] @@ -1581,7 +1746,9 @@ class SEEK(Frame): Mutagen does not find tags at seek offsets. """ - _framespec = [IntegerSpec('offset')] + _framespec = [ + IntegerSpec('offset', default=0), + ] def __pos__(self): return self.offset @@ -1598,12 +1765,13 @@ class ASPI(Frame): Attributes: S, L, N, b, and Fi. For the meaning of these, see the ID3v2.4 specification. Fi is a list of integers. """ + _framespec = [ - SizedIntegerSpec("S", 4), - SizedIntegerSpec("L", 4), - SizedIntegerSpec("N", 2), - ByteSpec("b"), - ASPIIndexSpec("Fi"), + SizedIntegerSpec("S", size=4, default=0), + SizedIntegerSpec("L", size=4, default=0), + SizedIntegerSpec("N", size=2, default=0), + ByteSpec("b", default=0), + ASPIIndexSpec("Fi", default=[]), ] def __eq__(self, other): @@ -1721,6 +1889,26 @@ class TEN(TENC): "Encoder" +class TST(TSOT): + "Title Sort Order key" + + +class TSA(TSOA): + "Album Sort Order key" + + +class TS2(TSO2): + "iTunes Album Artist Sort" + + +class TSP(TSOP): + "Perfomer Sort Order key" + + +class TSC(TSOC): + "iTunes Composer Sort" + + class TSS(TSSE): "Encoder settings" @@ -1825,7 +2013,19 @@ class COM(COMM): "Comment" -# class RVA(RVAD) +class RVA(RVAD): + "Relative volume adjustment" + + _framespec = [ + RVASpec("adjustments", stereo_only=True), + ] + + def _to_other(self, other): + if not isinstance(other, RVAD): + raise TypeError + + other.adjustments = list(self.adjustments) + # class EQU(EQUA) @@ -1842,10 +2042,10 @@ class PIC(APIC): _framespec = [ EncodingSpec('encoding'), - StringSpec('mime', 3), + StringSpec('mime', length=3, default="JPG"), PictureTypeSpec('type'), EncodedTextSpec('desc'), - BinaryDataSpec('data') + BinaryDataSpec('data'), ] def _to_other(self, other): @@ -1877,8 +2077,12 @@ class BUF(RBUF): class CRM(Frame): """Encrypted meta frame""" - _framespec = [Latin1TextSpec('owner'), Latin1TextSpec('desc'), - BinaryDataSpec('data')] + + _framespec = [ + Latin1TextSpec('owner'), + Latin1TextSpec('desc'), + BinaryDataSpec('data'), + ] def __eq__(self, other): return self.data == other @@ -1893,26 +2097,28 @@ class LNK(LINK): """Linked information""" _framespec = [ - StringSpec('frameid', 3), - Latin1TextSpec('url') + FrameIDSpec('frameid', length=3), + Latin1TextSpec('url'), + BinaryDataSpec('data'), ] - _optionalspec = [BinaryDataSpec('data')] - def _to_other(self, other): if not isinstance(other, LINK): raise TypeError if isinstance(other, LNK): - other.frameid = self.frameid + new_frameid = self.frameid else: try: - other.frameid = Frames_2_2[self.frameid].__bases__[0].__name__ + new_frameid = Frames_2_2[self.frameid].__bases__[0].__name__ except KeyError: - other.frameid = self.frameid.ljust(4) + new_frameid = self.frameid.ljust(4) + + # we could end up with invalid IDs here, so bypass the validation + other._setattr("frameid", new_frameid) + other.url = self.url - if hasattr(self, "data"): - other.data = self.data + other.data = self.data Frames = {} diff --git a/libs/mutagen/id3/_id3v1.py b/libs/mutagen/id3/_id3v1.py new file mode 100644 index 00000000..d41d00d0 --- /dev/null +++ b/libs/mutagen/id3/_id3v1.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2005 Michael Urman +# 2006 Lukas Lalinsky +# 2013 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import errno +from struct import error as StructError, unpack + +from mutagen._util import chr_, text_type + +from ._frames import TCON, TRCK, COMM, TDRC, TALB, TPE1, TIT2 + + +def find_id3v1(fileobj): + """Returns a tuple of (id3tag, offset_to_end) or (None, 0) + + offset mainly because we used to write too short tags in some cases and + we need the offset to delete them. + """ + + # id3v1 is always at the end (after apev2) + + extra_read = b"APETAGEX".index(b"TAG") + + try: + fileobj.seek(-128 - extra_read, 2) + except IOError as e: + if e.errno == errno.EINVAL: + # If the file is too small, might be ok since we wrote too small + # tags at some point. let's see how the parsing goes.. + fileobj.seek(0, 0) + else: + raise + + data = fileobj.read(128 + extra_read) + try: + idx = data.index(b"TAG") + except ValueError: + return (None, 0) + else: + # FIXME: make use of the apev2 parser here + # if TAG is part of APETAGEX assume this is an APEv2 tag + try: + ape_idx = data.index(b"APETAGEX") + except ValueError: + pass + else: + if idx == ape_idx + extra_read: + return (None, 0) + + tag = ParseID3v1(data[idx:]) + if tag is None: + return (None, 0) + + offset = idx - len(data) + return (tag, offset) + + +# ID3v1.1 support. +def ParseID3v1(data): + """Parse an ID3v1 tag, returning a list of ID3v2.4 frames. + + Returns a {frame_name: frame} dict or None. + """ + + try: + data = data[data.index(b"TAG"):] + except ValueError: + return None + if 128 < len(data) or len(data) < 124: + return None + + # Issue #69 - Previous versions of Mutagen, when encountering + # out-of-spec TDRC and TYER frames of less than four characters, + # wrote only the characters available - e.g. "1" or "" - into the + # year field. To parse those, reduce the size of the year field. + # Amazingly, "0s" works as a struct format string. + unpack_fmt = "3s30s30s30s%ds29sBB" % (len(data) - 124) + + try: + tag, title, artist, album, year, comment, track, genre = unpack( + unpack_fmt, data) + except StructError: + return None + + if tag != b"TAG": + return None + + def fix(data): + return data.split(b"\x00")[0].strip().decode('latin1') + + title, artist, album, year, comment = map( + fix, [title, artist, album, year, comment]) + + frames = {} + if title: + frames["TIT2"] = TIT2(encoding=0, text=title) + if artist: + frames["TPE1"] = TPE1(encoding=0, text=[artist]) + if album: + frames["TALB"] = TALB(encoding=0, text=album) + if year: + frames["TDRC"] = TDRC(encoding=0, text=year) + if comment: + frames["COMM"] = COMM( + encoding=0, lang="eng", desc="ID3v1 Comment", text=comment) + # Don't read a track number if it looks like the comment was + # padded with spaces instead of nulls (thanks, WinAmp). + if track and ((track != 32) or (data[-3] == b'\x00'[0])): + frames["TRCK"] = TRCK(encoding=0, text=str(track)) + if genre != 255: + frames["TCON"] = TCON(encoding=0, text=str(genre)) + return frames + + +def MakeID3v1(id3): + """Return an ID3v1.1 tag string from a dict of ID3v2.4 frames.""" + + v1 = {} + + for v2id, name in {"TIT2": "title", "TPE1": "artist", + "TALB": "album"}.items(): + if v2id in id3: + text = id3[v2id].text[0].encode('latin1', 'replace')[:30] + else: + text = b"" + v1[name] = text + (b"\x00" * (30 - len(text))) + + if "COMM" in id3: + cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28] + else: + cmnt = b"" + v1["comment"] = cmnt + (b"\x00" * (29 - len(cmnt))) + + if "TRCK" in id3: + try: + v1["track"] = chr_(+id3["TRCK"]) + except ValueError: + v1["track"] = b"\x00" + else: + v1["track"] = b"\x00" + + if "TCON" in id3: + try: + genre = id3["TCON"].genres[0] + except IndexError: + pass + else: + if genre in TCON.GENRES: + v1["genre"] = chr_(TCON.GENRES.index(genre)) + if "genre" not in v1: + v1["genre"] = b"\xff" + + if "TDRC" in id3: + year = text_type(id3["TDRC"]).encode('ascii') + elif "TYER" in id3: + year = text_type(id3["TYER"]).encode('ascii') + else: + year = b"" + v1["year"] = (year + b"\x00\x00\x00\x00")[:4] + + return ( + b"TAG" + + v1["title"] + + v1["artist"] + + v1["album"] + + v1["year"] + + v1["comment"] + + v1["track"] + + v1["genre"] + ) diff --git a/libs/mutagen/id3/_specs.py b/libs/mutagen/id3/_specs.py index 22e4335b..63784333 100644 --- a/libs/mutagen/id3/_specs.py +++ b/libs/mutagen/id3/_specs.py @@ -1,18 +1,20 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import struct +import codecs from struct import unpack, pack from .._compat import text_type, chr_, PY3, swap_to_string, string_types, \ xrange -from .._util import total_ordering, decode_terminated, enum, izip -from ._util import BitPaddedInt +from .._util import total_ordering, decode_terminated, enum, izip, flags, \ + cdata, encode_endian, intround +from ._util import BitPaddedInt, is_valid_frame_id @enum @@ -84,6 +86,19 @@ class PictureType(object): PUBLISHER_LOGOTYPE = 20 """Publisher/Studio logotype""" + def _pprint(self): + return text_type(self).split(".", 1)[-1].lower().replace("_", " ") + + +@flags +class CTOCFlags(object): + + TOP_LEVEL = 0x2 + """Identifies the CTOC root frame""" + + ORDERED = 0x1 + """Child elements are ordered""" + class SpecError(Exception): pass @@ -91,8 +106,14 @@ class SpecError(Exception): class Spec(object): - def __init__(self, name): + handle_nodata = False + """If reading empty data is possible and writing it back will again + result in no data. + """ + + def __init__(self, name, default): self.name = name + self.default = default def __hash__(self): raise TypeError("Spec objects are unhashable") @@ -104,25 +125,46 @@ class Spec(object): return value - def read(self, frame, data): - """Returns the (value, left_data) or raises SpecError""" + def read(self, header, frame, data): + """ + Returns: + (value: object, left_data: bytes) + Raises: + SpecError + """ raise NotImplementedError - def write(self, frame, value): + def write(self, config, frame, value): + """ + Returns: + bytes: The serialized data + Raises: + SpecError + """ raise NotImplementedError def validate(self, frame, value): - """Returns the validated data or raises ValueError/TypeError""" + """ + Returns: + the validated value + Raises: + ValueError + TypeError + """ raise NotImplementedError class ByteSpec(Spec): - def read(self, frame, data): + + def __init__(self, name, default=0): + super(ByteSpec, self).__init__(name, default) + + def read(self, header, frame, data): return bytearray(data)[0], data[1:] - def write(self, frame, value): + def write(self, config, frame, value): return chr_(value) def validate(self, frame, value): @@ -133,8 +175,11 @@ class ByteSpec(Spec): class PictureTypeSpec(ByteSpec): - def read(self, frame, data): - value, data = ByteSpec.read(self, frame, data) + def __init__(self, name, default=PictureType.COVER_FRONT): + super(PictureTypeSpec, self).__init__(name, default) + + def read(self, header, frame, data): + value, data = ByteSpec.read(self, header, frame, data) return PictureType(value), data def validate(self, frame, value): @@ -144,11 +189,24 @@ class PictureTypeSpec(ByteSpec): return value +class CTOCFlagsSpec(ByteSpec): + + def read(self, header, frame, data): + value, data = ByteSpec.read(self, header, frame, data) + return CTOCFlags(value), data + + def validate(self, frame, value): + value = ByteSpec.validate(self, frame, value) + if value is not None: + return CTOCFlags(value) + return value + + class IntegerSpec(Spec): - def read(self, frame, data): + def read(self, header, frame, data): return int(BitPaddedInt(data, bits=8)), b'' - def write(self, frame, value): + def write(self, config, frame, value): return BitPaddedInt.to_str(value, bits=8, width=-1) def validate(self, frame, value): @@ -156,13 +214,15 @@ class IntegerSpec(Spec): class SizedIntegerSpec(Spec): - def __init__(self, name, size): - self.name, self.__sz = name, size - def read(self, frame, data): + def __init__(self, name, size, default): + self.name, self.__sz = name, size + self.default = default + + def read(self, header, frame, data): return int(BitPaddedInt(data[:self.__sz], bits=8)), data[self.__sz:] - def write(self, frame, value): + def write(self, config, frame, value): return BitPaddedInt.to_str(value, bits=8, width=self.__sz) def validate(self, frame, value): @@ -188,8 +248,11 @@ class Encoding(object): class EncodingSpec(ByteSpec): - def read(self, frame, data): - enc, data = super(EncodingSpec, self).read(frame, data) + def __init__(self, name, default=Encoding.UTF16): + super(EncodingSpec, self).__init__(name, default) + + def read(self, header, frame, data): + enc, data = super(EncodingSpec, self).read(header, frame, data) if enc not in (Encoding.LATIN1, Encoding.UTF16, Encoding.UTF16BE, Encoding.UTF8): raise SpecError('Invalid Encoding: %r' % enc) @@ -197,7 +260,7 @@ class EncodingSpec(ByteSpec): def validate(self, frame, value): if value is None: - return None + raise TypeError if value not in (Encoding.LATIN1, Encoding.UTF16, Encoding.UTF16BE, Encoding.UTF8): raise ValueError('Invalid Encoding: %r' % value) @@ -213,11 +276,13 @@ class EncodingSpec(ByteSpec): class StringSpec(Spec): """A fixed size ASCII only payload.""" - def __init__(self, name, length): - super(StringSpec, self).__init__(name) + def __init__(self, name, length, default=None): + if default is None: + default = u" " * length + super(StringSpec, self).__init__(name, default) self.len = length - def read(s, frame, data): + def read(s, header, frame, data): chunk = data[:s.len] try: ascii = chunk.decode("ascii") @@ -229,39 +294,134 @@ class StringSpec(Spec): return chunk, data[s.len:] - def write(s, frame, value): - if value is None: - return b'\x00' * s.len - else: - if PY3: - value = value.encode("ascii") - return (bytes(value) + b'\x00' * s.len)[:s.len] + def write(self, config, frame, value): + if PY3: + value = value.encode("ascii") + return (bytes(value) + b'\x00' * self.len)[:self.len] - def validate(s, frame, value): + def validate(self, frame, value): if value is None: - return None - + raise TypeError if PY3: if not isinstance(value, str): - raise TypeError("%s has to be str" % s.name) + raise TypeError("%s has to be str" % self.name) value.encode("ascii") else: if not isinstance(value, bytes): value = value.encode("ascii") - if len(value) == s.len: + if len(value) == self.len: return value - raise ValueError('Invalid StringSpec[%d] data: %r' % (s.len, value)) + raise ValueError('Invalid StringSpec[%d] data: %r' % (self.len, value)) + + +class RVASpec(Spec): + + def __init__(self, name, stereo_only, default=[0, 0]): + # two_chan: RVA has only 2 channels, while RVAD has 6 channels + super(RVASpec, self).__init__(name, default) + self._max_values = 4 if stereo_only else 12 + + def read(self, header, frame, data): + # inc/dec flags + spec = ByteSpec("flags", 0) + flags, data = spec.read(header, frame, data) + if not data: + raise SpecError("truncated") + + # how many bytes per value + bits, data = spec.read(header, frame, data) + if bits == 0: + # not allowed according to spec + raise SpecError("bits used has to be > 0") + bytes_per_value = (bits + 7) // 8 + + values = [] + while len(data) >= bytes_per_value and len(values) < self._max_values: + v = BitPaddedInt(data[:bytes_per_value], bits=8) + data = data[bytes_per_value:] + values.append(v) + + if len(values) < 2: + raise SpecError("First two values not optional") + + # if the respective flag bit is zero, take as decrement + for bit, index in enumerate([0, 1, 4, 5, 8, 10]): + if not cdata.test_bit(flags, bit): + try: + values[index] = -values[index] + except IndexError: + break + + return values, data + + def write(self, config, frame, values): + if len(values) < 2 or len(values) > self._max_values: + raise SpecError( + "at least two volume change values required, max %d" % + self._max_values) + + spec = ByteSpec("flags", 0) + + flags = 0 + values = list(values) + for bit, index in enumerate([0, 1, 4, 5, 8, 10]): + try: + if values[index] < 0: + values[index] = -values[index] + else: + flags |= (1 << bit) + except IndexError: + break + + buffer_ = bytearray() + buffer_.extend(spec.write(config, frame, flags)) + + # serialized and make them all the same size (min 2 bytes) + byte_values = [ + BitPaddedInt.to_str(v, bits=8, width=-1, minwidth=2) + for v in values] + max_bytes = max([len(v) for v in byte_values]) + byte_values = [v.ljust(max_bytes, b"\x00") for v in byte_values] + + bits = max_bytes * 8 + buffer_.extend(spec.write(config, frame, bits)) + + for v in byte_values: + buffer_.extend(v) + + return bytes(buffer_) + + def validate(self, frame, values): + if len(values) < 2 or len(values) > self._max_values: + raise ValueError("needs list of length 2..%d" % self._max_values) + return values + + +class FrameIDSpec(StringSpec): + + def __init__(self, name, length): + super(FrameIDSpec, self).__init__(name, length, u"X" * length) + + def validate(self, frame, value): + value = super(FrameIDSpec, self).validate(frame, value) + if not is_valid_frame_id(value): + raise ValueError("Invalid frame ID") + return value class BinaryDataSpec(Spec): - def read(self, frame, data): + + handle_nodata = True + + def __init__(self, name, default=b""): + super(BinaryDataSpec, self).__init__(name, default) + + def read(self, header, frame, data): return data, b'' - def write(self, frame, value): - if value is None: - return b"" + def write(self, config, frame, value): if isinstance(value, bytes): return value value = text_type(value).encode("ascii") @@ -269,8 +429,7 @@ class BinaryDataSpec(Spec): def validate(self, frame, value): if value is None: - return None - + raise TypeError if isinstance(value, bytes): return value elif PY3: @@ -280,6 +439,22 @@ class BinaryDataSpec(Spec): return value +def iter_text_fixups(data, encoding): + """Yields a series of repaired text values for decoding""" + + yield data + if encoding == Encoding.UTF16BE: + # wrong termination + yield data + b"\x00" + elif encoding == Encoding.UTF16: + # wrong termination + yield data + b"\x00" + # utf-16 is missing BOM, content is usually utf-16-le + yield codecs.BOM_UTF16_LE + data + # both cases combined + yield codecs.BOM_UTF16_LE + data + b"\x00" + + class EncodedTextSpec(Spec): _encodings = { @@ -289,25 +464,33 @@ class EncodedTextSpec(Spec): Encoding.UTF8: ('utf8', b'\x00'), } - def read(self, frame, data): + def __init__(self, name, default=u""): + super(EncodedTextSpec, self).__init__(name, default) + + def read(self, header, frame, data): + enc, term = self._encodings[frame.encoding] + err = None + for data in iter_text_fixups(data, frame.encoding): + try: + value, data = decode_terminated(data, enc, strict=False) + except ValueError as e: + err = e + else: + # Older id3 did not support multiple values, but we still + # read them. To not missinterpret zero padded values with + # a list of empty strings, stop if everything left is zero. + # https://github.com/quodlibet/mutagen/issues/276 + if header.version < header._V24 and not data.strip(b"\x00"): + data = b"" + return value, data + raise SpecError(err) + + def write(self, config, frame, value): enc, term = self._encodings[frame.encoding] try: - # allow missing termination - return decode_terminated(data, enc, strict=False) - except ValueError: - # utf-16 termination with missing BOM, or single NULL - if not data[:len(term)].strip(b"\x00"): - return u"", data[len(term):] - - # utf-16 data with single NULL, see issue 169 - try: - return decode_terminated(data + b"\x00", enc) - except ValueError: - raise SpecError("Decoding error") - - def write(self, frame, value): - enc, term = self._encodings[frame.encoding] - return value.encode(enc) + term + return encode_endian(value, enc, le=True) + term + except UnicodeEncodeError as e: + raise SpecError(e) def validate(self, frame, value): return text_type(value) @@ -315,16 +498,16 @@ class EncodedTextSpec(Spec): class MultiSpec(Spec): def __init__(self, name, *specs, **kw): - super(MultiSpec, self).__init__(name) + super(MultiSpec, self).__init__(name, default=kw.get('default')) self.specs = specs self.sep = kw.get('sep') - def read(self, frame, data): + def read(self, header, frame, data): values = [] while data: record = [] for spec in self.specs: - value, data = spec.read(frame, data) + value, data = spec.read(header, frame, data) record.append(value) if len(self.specs) != 1: values.append(record) @@ -332,20 +515,18 @@ class MultiSpec(Spec): values.append(record[0]) return values, data - def write(self, frame, value): + def write(self, config, frame, value): data = [] if len(self.specs) == 1: for v in value: - data.append(self.specs[0].write(frame, v)) + data.append(self.specs[0].write(config, frame, v)) else: for record in value: for v, s in izip(record, self.specs): - data.append(s.write(frame, v)) + data.append(s.write(config, frame, v)) return b''.join(data) def validate(self, frame, value): - if value is None: - return [] if self.sep and isinstance(value, string_types): value = value.split(self.sep) if isinstance(value, list): @@ -385,21 +566,87 @@ class EncodedNumericPartTextSpec(EncodedTextSpec): pass -class Latin1TextSpec(EncodedTextSpec): - def read(self, frame, data): +class Latin1TextSpec(Spec): + + def __init__(self, name, default=u""): + super(Latin1TextSpec, self).__init__(name, default) + + def read(self, header, frame, data): if b'\x00' in data: data, ret = data.split(b'\x00', 1) else: ret = b'' return data.decode('latin1'), ret - def write(self, data, value): + def write(self, config, data, value): return value.encode('latin1') + b'\x00' def validate(self, frame, value): return text_type(value) +class ID3FramesSpec(Spec): + + handle_nodata = True + + def __init__(self, name, default=[]): + super(ID3FramesSpec, self).__init__(name, default) + + def read(self, header, frame, data): + from ._tags import ID3Tags + + tags = ID3Tags() + return tags, tags._read(header, data) + + def _validate23(self, frame, value, **kwargs): + from ._tags import ID3Tags + + v = ID3Tags() + for frame in value.values(): + v.add(frame._get_v23_frame(**kwargs)) + return v + + def write(self, config, frame, value): + return bytes(value._write(config)) + + def validate(self, frame, value): + from ._tags import ID3Tags + + if isinstance(value, ID3Tags): + return value + + tags = ID3Tags() + for v in value: + tags.add(v) + + return tags + + +class Latin1TextListSpec(Spec): + + def __init__(self, name, default=[]): + super(Latin1TextListSpec, self).__init__(name, default) + self._bspec = ByteSpec("entry_count", default=0) + self._lspec = Latin1TextSpec("child_element_id") + + def read(self, header, frame, data): + count, data = self._bspec.read(header, frame, data) + entries = [] + for i in xrange(count): + entry, data = self._lspec.read(header, frame, data) + entries.append(entry) + return entries, data + + def write(self, config, frame, value): + b = self._bspec.write(config, frame, len(value)) + for v in value: + b += self._lspec.write(config, frame, v) + return b + + def validate(self, frame, value): + return [self._lspec.validate(frame, v) for v in value] + + @swap_to_string @total_ordering class ID3TimeStamp(object): @@ -438,7 +685,7 @@ class ID3TimeStamp(object): pieces.append(self.__formats[i] % part + self.__seps[i]) return u''.join(pieces)[:-1] - def set_text(self, text, splitre=re.compile('[-T:/.]|\s+')): + def set_text(self, text, splitre=re.compile('[-T:/.]|\\s+')): year, month, day, hour, minute, second = \ splitre.split(text + ':::::')[:6] for a in 'year month day hour minute second'.split(): @@ -460,7 +707,7 @@ class ID3TimeStamp(object): return repr(self.text) def __eq__(self, other): - return self.text == other.text + return isinstance(other, ID3TimeStamp) and self.text == other.text def __lt__(self, other): return self.text < other.text @@ -472,12 +719,12 @@ class ID3TimeStamp(object): class TimeStampSpec(EncodedTextSpec): - def read(self, frame, data): - value, data = super(TimeStampSpec, self).read(frame, data) + def read(self, header, frame, data): + value, data = super(TimeStampSpec, self).read(header, frame, data) return self.validate(frame, value), data - def write(self, frame, data): - return super(TimeStampSpec, self).write(frame, + def write(self, config, frame, data): + return super(TimeStampSpec, self).write(config, frame, data.text.replace(' ', 'T')) def validate(self, frame, value): @@ -493,12 +740,12 @@ class ChannelSpec(ByteSpec): class VolumeAdjustmentSpec(Spec): - def read(self, frame, data): + def read(self, header, frame, data): value, = unpack('>h', data[0:2]) return value / 512.0, data[2:] - def write(self, frame, value): - number = int(round(value * 512)) + def write(self, config, frame, value): + number = intround(value * 512) # pack only fails in 2.7, do it manually in 2.6 if not -32768 <= number <= 32767: raise SpecError("not in range") @@ -507,14 +754,14 @@ class VolumeAdjustmentSpec(Spec): def validate(self, frame, value): if value is not None: try: - self.write(frame, value) + self.write(None, frame, value) except SpecError: raise ValueError("out of range") return value class VolumePeakSpec(Spec): - def read(self, frame, data): + def read(self, header, frame, data): # http://bugs.xmms.org/attachment.cgi?id=113&action=view peak = 0 data_array = bytearray(data) @@ -530,8 +777,8 @@ class VolumePeakSpec(Spec): peak *= 2 ** shift return (float(peak) / (2 ** 31 - 1)), data[1 + vol_bytes:] - def write(self, frame, value): - number = int(round(value * 32768)) + def write(self, config, frame, value): + number = intround(value * 32768) # pack only fails in 2.7, do it manually in 2.6 if not 0 <= number <= 65535: raise SpecError("not in range") @@ -541,14 +788,14 @@ class VolumePeakSpec(Spec): def validate(self, frame, value): if value is not None: try: - self.write(frame, value) + self.write(None, frame, value) except SpecError: raise ValueError("out of range") return value class SynchronizedTextSpec(EncodedTextSpec): - def read(self, frame, data): + def read(self, header, frame, data): texts = [] encoding, term = self._encodings[frame.encoding] while data: @@ -565,11 +812,14 @@ class SynchronizedTextSpec(EncodedTextSpec): data = data[4:] return texts, b"" - def write(self, frame, value): + def write(self, config, frame, value): data = [] encoding, term = self._encodings[frame.encoding] for text, time in value: - text = text.encode(encoding) + term + try: + text = encode_endian(text, encoding, le=True) + term + except UnicodeEncodeError as e: + raise SpecError(e) data.append(text + struct.pack(">I", time)) return b"".join(data) @@ -578,23 +828,23 @@ class SynchronizedTextSpec(EncodedTextSpec): class KeyEventSpec(Spec): - def read(self, frame, data): + def read(self, header, frame, data): events = [] while len(data) >= 5: events.append(struct.unpack(">bI", data[:5])) data = data[5:] return events, data - def write(self, frame, value): + def write(self, config, frame, value): return b"".join(struct.pack(">bI", *event) for event in value) def validate(self, frame, value): - return value + return list(value) class VolumeAdjustmentsSpec(Spec): # Not to be confused with VolumeAdjustmentSpec. - def read(self, frame, data): + def read(self, header, frame, data): adjustments = {} while len(data) >= 4: freq, adj = struct.unpack(">Hh", data[:4]) @@ -605,17 +855,18 @@ class VolumeAdjustmentsSpec(Spec): adjustments = sorted(adjustments.items()) return adjustments, data - def write(self, frame, value): + def write(self, config, frame, value): value.sort() return b"".join(struct.pack(">Hh", int(freq * 2), int(adj * 512)) for (freq, adj) in value) def validate(self, frame, value): - return value + return list(value) class ASPIIndexSpec(Spec): - def read(self, frame, data): + + def read(self, header, frame, data): if frame.b == 16: format = "H" size = 2 @@ -632,7 +883,7 @@ class ASPIIndexSpec(Spec): except struct.error as e: raise SpecError(e) - def write(self, frame, values): + def write(self, config, frame, values): if frame.b == 16: format = "H" elif frame.b == 8: @@ -645,4 +896,4 @@ class ASPIIndexSpec(Spec): raise SpecError(e) def validate(self, frame, values): - return values + return list(values) diff --git a/libs/mutagen/id3/_tags.py b/libs/mutagen/id3/_tags.py new file mode 100644 index 00000000..99845faa --- /dev/null +++ b/libs/mutagen/id3/_tags.py @@ -0,0 +1,638 @@ +# -*- coding: utf-8 -*- +# Copyright 2005 Michael Urman +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import struct + +from mutagen._tags import Tags +from mutagen._util import DictProxy, convert_error, read_full +from mutagen._compat import PY3, text_type, itervalues + +from ._util import BitPaddedInt, unsynch, ID3JunkFrameError, \ + ID3EncryptionUnsupportedError, is_valid_frame_id, error, \ + ID3NoHeaderError, ID3UnsupportedVersionError, ID3SaveConfig +from ._frames import TDRC, APIC, TDOR, TIME, TIPL, TORY, TDAT, Frames_2_2, \ + TextFrame, TYER, Frame, IPLS, Frames + + +class ID3Header(object): + + _V24 = (2, 4, 0) + _V23 = (2, 3, 0) + _V22 = (2, 2, 0) + _V11 = (1, 1) + + f_unsynch = property(lambda s: bool(s._flags & 0x80)) + f_extended = property(lambda s: bool(s._flags & 0x40)) + f_experimental = property(lambda s: bool(s._flags & 0x20)) + f_footer = property(lambda s: bool(s._flags & 0x10)) + + _known_frames = None + + @property + def known_frames(self): + if self._known_frames is not None: + return self._known_frames + elif self.version >= ID3Header._V23: + return Frames + elif self.version >= ID3Header._V22: + return Frames_2_2 + + @convert_error(IOError, error) + def __init__(self, fileobj=None): + """Raises ID3NoHeaderError, ID3UnsupportedVersionError or error""" + + if fileobj is None: + # for testing + self._flags = 0 + return + + fn = getattr(fileobj, "name", "<unknown>") + data = fileobj.read(10) + if len(data) != 10: + raise ID3NoHeaderError("%s: too small" % fn) + + id3, vmaj, vrev, flags, size = struct.unpack('>3sBBB4s', data) + self._flags = flags + self.size = BitPaddedInt(size) + 10 + self.version = (2, vmaj, vrev) + + if id3 != b'ID3': + raise ID3NoHeaderError("%r doesn't start with an ID3 tag" % fn) + + if vmaj not in [2, 3, 4]: + raise ID3UnsupportedVersionError("%r ID3v2.%d not supported" + % (fn, vmaj)) + + if not BitPaddedInt.has_valid_padding(size): + raise error("Header size not synchsafe") + + if (self.version >= self._V24) and (flags & 0x0f): + raise error( + "%r has invalid flags %#02x" % (fn, flags)) + elif (self._V23 <= self.version < self._V24) and (flags & 0x1f): + raise error( + "%r has invalid flags %#02x" % (fn, flags)) + + if self.f_extended: + extsize_data = read_full(fileobj, 4) + + if PY3: + frame_id = extsize_data.decode("ascii", "replace") + else: + frame_id = extsize_data + + if frame_id in Frames: + # Some tagger sets the extended header flag but + # doesn't write an extended header; in this case, the + # ID3 data follows immediately. Since no extended + # header is going to be long enough to actually match + # a frame, and if it's *not* a frame we're going to be + # completely lost anyway, this seems to be the most + # correct check. + # https://github.com/quodlibet/quodlibet/issues/126 + self._flags ^= 0x40 + extsize = 0 + fileobj.seek(-4, 1) + elif self.version >= self._V24: + # "Where the 'Extended header size' is the size of the whole + # extended header, stored as a 32 bit synchsafe integer." + extsize = BitPaddedInt(extsize_data) - 4 + if not BitPaddedInt.has_valid_padding(extsize_data): + raise error( + "Extended header size not synchsafe") + else: + # "Where the 'Extended header size', currently 6 or 10 bytes, + # excludes itself." + extsize = struct.unpack('>L', extsize_data)[0] + + self._extdata = read_full(fileobj, extsize) + + +def determine_bpi(data, frames, EMPTY=b"\x00" * 10): + """Takes id3v2.4 frame data and determines if ints or bitpaddedints + should be used for parsing. Needed because iTunes used to write + normal ints for frame sizes. + """ + + # count number of tags found as BitPaddedInt and how far past + o = 0 + asbpi = 0 + while o < len(data) - 10: + part = data[o:o + 10] + if part == EMPTY: + bpioff = -((len(data) - o) % 10) + break + name, size, flags = struct.unpack('>4sLH', part) + size = BitPaddedInt(size) + o += 10 + size + if PY3: + try: + name = name.decode("ascii") + except UnicodeDecodeError: + continue + if name in frames: + asbpi += 1 + else: + bpioff = o - len(data) + + # count number of tags found as int and how far past + o = 0 + asint = 0 + while o < len(data) - 10: + part = data[o:o + 10] + if part == EMPTY: + intoff = -((len(data) - o) % 10) + break + name, size, flags = struct.unpack('>4sLH', part) + o += 10 + size + if PY3: + try: + name = name.decode("ascii") + except UnicodeDecodeError: + continue + if name in frames: + asint += 1 + else: + intoff = o - len(data) + + # if more tags as int, or equal and bpi is past and int is not + if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)): + return int + return BitPaddedInt + + +class ID3Tags(DictProxy, Tags): + + __module__ = "mutagen.id3" + + def __init__(self, *args, **kwargs): + self.unknown_frames = [] + self._unknown_v2_version = 4 + super(ID3Tags, self).__init__(*args, **kwargs) + + def _read(self, header, data): + frames, unknown_frames, data = read_frames( + header, data, header.known_frames) + for frame in frames: + self._add(frame, False) + self.unknown_frames = unknown_frames + self._unknown_v2_version = header.version[1] + return data + + def _write(self, config): + # Sort frames by 'importance', then reverse frame size and then frame + # hash to get a stable result + order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"] + + framedata = [ + (f, save_frame(f, config=config)) for f in itervalues(self)] + + def get_prio(frame): + try: + return order.index(frame.FrameID) + except ValueError: + return len(order) + + def sort_key(items): + frame, data = items + return (get_prio(frame), len(data), frame.HashKey) + + framedata = [d for (f, d) in sorted(framedata, key=sort_key)] + + # only write unknown frames if they were loaded from the version + # we are saving with. Theoretically we could upgrade frames + # but some frames can be nested like CHAP, so there is a chance + # we create a mixed frame mess. + if self._unknown_v2_version == config.v2_version: + framedata.extend(data for data in self.unknown_frames + if len(data) > 10) + + return bytearray().join(framedata) + + def getall(self, key): + """Return all frames with a given name (the list may be empty). + + Args: + key (text): key for frames to get + + This is best explained by examples:: + + id3.getall('TIT2') == [id3['TIT2']] + id3.getall('TTTT') == [] + id3.getall('TXXX') == [TXXX(desc='woo', text='bar'), + TXXX(desc='baz', text='quuuux'), ...] + + Since this is based on the frame's HashKey, which is + colon-separated, you can use it to do things like + ``getall('COMM:MusicMatch')`` or ``getall('TXXX:QuodLibet:')``. + """ + if key in self: + return [self[key]] + else: + key = key + ":" + return [v for s, v in self.items() if s.startswith(key)] + + def setall(self, key, values): + """Delete frames of the given type and add frames in 'values'. + + Args: + key (text): key for frames to delete + values (List[`Frame`]): frames to add + """ + + self.delall(key) + for tag in values: + self[tag.HashKey] = tag + + def delall(self, key): + """Delete all tags of a given kind; see getall. + + Args: + key (text): key for frames to delete + """ + + if key in self: + del(self[key]) + else: + key = key + ":" + for k in list(self.keys()): + if k.startswith(key): + del(self[k]) + + def pprint(self): + """ + Returns: + text: tags in a human-readable format. + + "Human-readable" is used loosely here. The format is intended + to mirror that used for Vorbis or APEv2 output, e.g. + + ``TIT2=My Title`` + + However, ID3 frames can have multiple keys: + + ``POPM=user@example.org=3 128/255`` + """ + + frames = sorted(Frame.pprint(s) for s in self.values()) + return "\n".join(frames) + + def _add(self, frame, strict): + """Add a frame. + + Args: + frame (Frame): the frame to add + strict (bool): if this should raise in case it can't be added + and frames shouldn't be merged. + """ + + if not isinstance(frame, Frame): + raise TypeError("%r not a Frame instance" % frame) + + orig_frame = frame + frame = frame._upgrade_frame() + if frame is None: + if not strict: + return + raise TypeError( + "Can't upgrade %r frame" % type(orig_frame).__name__) + + hash_key = frame.HashKey + if strict or hash_key not in self: + self[hash_key] = frame + return + + # Try to merge frames, or change the new one. Since changing + # the new one can lead to new conflicts, try until everything is + # either merged or added. + while True: + old_frame = self[hash_key] + new_frame = old_frame._merge_frame(frame) + new_hash = new_frame.HashKey + if new_hash == hash_key: + self[hash_key] = new_frame + break + else: + assert new_frame is frame + if new_hash not in self: + self[new_hash] = new_frame + break + hash_key = new_hash + + def loaded_frame(self, tag): + """Deprecated; use the add method.""" + + self._add(tag, True) + + def add(self, frame): + """Add a frame to the tag.""" + + # add = loaded_frame (and vice versa) break applications that + # expect to be able to override loaded_frame (e.g. Quod Libet), + # as does making loaded_frame call add. + self.loaded_frame(frame) + + def __setitem__(self, key, tag): + if not isinstance(tag, Frame): + raise TypeError("%r not a Frame instance" % tag) + super(ID3Tags, self).__setitem__(key, tag) + + def __update_common(self): + """Updates done by both v23 and v24 update""" + + if "TCON" in self: + # Get rid of "(xx)Foobr" format. + self["TCON"].genres = self["TCON"].genres + + mimes = {"PNG": "image/png", "JPG": "image/jpeg"} + for pic in self.getall("APIC"): + if pic.mime in mimes: + newpic = APIC( + encoding=pic.encoding, mime=mimes[pic.mime], + type=pic.type, desc=pic.desc, data=pic.data) + self.add(newpic) + + def update_to_v24(self): + """Convert older tags into an ID3v2.4 tag. + + This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to + TDRC). If you intend to save tags, you must call this function + at some point; it is called by default when loading the tag. + """ + + self.__update_common() + + # TDAT, TYER, and TIME have been turned into TDRC. + try: + date = text_type(self.get("TYER", "")) + if date.strip(u"\x00"): + self.pop("TYER") + dat = text_type(self.get("TDAT", "")) + if dat.strip("\x00"): + self.pop("TDAT") + date = "%s-%s-%s" % (date, dat[2:], dat[:2]) + time = text_type(self.get("TIME", "")) + if time.strip("\x00"): + self.pop("TIME") + date += "T%s:%s:00" % (time[:2], time[2:]) + if "TDRC" not in self: + self.add(TDRC(encoding=0, text=date)) + except UnicodeDecodeError: + # Old ID3 tags have *lots* of Unicode problems, so if TYER + # is bad, just chuck the frames. + pass + + # TORY can be the first part of a TDOR. + if "TORY" in self: + f = self.pop("TORY") + if "TDOR" not in self: + try: + self.add(TDOR(encoding=0, text=str(f))) + except UnicodeDecodeError: + pass + + # IPLS is now TIPL. + if "IPLS" in self: + f = self.pop("IPLS") + if "TIPL" not in self: + self.add(TIPL(encoding=f.encoding, people=f.people)) + + # These can't be trivially translated to any ID3v2.4 tags, or + # should have been removed already. + for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME"]: + if key in self: + del(self[key]) + + # Recurse into chapters + for f in self.getall("CHAP"): + f.sub_frames.update_to_v24() + for f in self.getall("CTOC"): + f.sub_frames.update_to_v24() + + def update_to_v23(self): + """Convert older (and newer) tags into an ID3v2.3 tag. + + This updates incompatible ID3v2 frames to ID3v2.3 ones. If you + intend to save tags as ID3v2.3, you must call this function + at some point. + + If you want to to go off spec and include some v2.4 frames + in v2.3, remove them before calling this and add them back afterwards. + """ + + self.__update_common() + + # TMCL, TIPL -> TIPL + if "TIPL" in self or "TMCL" in self: + people = [] + if "TIPL" in self: + f = self.pop("TIPL") + people.extend(f.people) + if "TMCL" in self: + f = self.pop("TMCL") + people.extend(f.people) + if "IPLS" not in self: + self.add(IPLS(encoding=f.encoding, people=people)) + + # TDOR -> TORY + if "TDOR" in self: + f = self.pop("TDOR") + if f.text: + d = f.text[0] + if d.year and "TORY" not in self: + self.add(TORY(encoding=f.encoding, text="%04d" % d.year)) + + # TDRC -> TYER, TDAT, TIME + if "TDRC" in self: + f = self.pop("TDRC") + if f.text: + d = f.text[0] + if d.year and "TYER" not in self: + self.add(TYER(encoding=f.encoding, text="%04d" % d.year)) + if d.month and d.day and "TDAT" not in self: + self.add(TDAT(encoding=f.encoding, + text="%02d%02d" % (d.day, d.month))) + if d.hour and d.minute and "TIME" not in self: + self.add(TIME(encoding=f.encoding, + text="%02d%02d" % (d.hour, d.minute))) + + # New frames added in v2.4 + v24_frames = [ + 'ASPI', 'EQU2', 'RVA2', 'SEEK', 'SIGN', 'TDEN', 'TDOR', + 'TDRC', 'TDRL', 'TDTG', 'TIPL', 'TMCL', 'TMOO', 'TPRO', + 'TSOA', 'TSOP', 'TSOT', 'TSST', + ] + + for key in v24_frames: + if key in self: + del(self[key]) + + # Recurse into chapters + for f in self.getall("CHAP"): + f.sub_frames.update_to_v23() + for f in self.getall("CTOC"): + f.sub_frames.update_to_v23() + + def _copy(self): + """Creates a shallow copy of all tags""" + + items = self.items() + subs = {} + for f in (self.getall("CHAP") + self.getall("CTOC")): + subs[f.HashKey] = f.sub_frames._copy() + return (items, subs) + + def _restore(self, value): + """Restores the state copied with _copy()""" + + items, subs = value + self.clear() + for key, value in items: + self[key] = value + if key in subs: + value.sub_frames._restore(subs[key]) + + +def save_frame(frame, name=None, config=None): + if config is None: + config = ID3SaveConfig() + + flags = 0 + if isinstance(frame, TextFrame): + if len(str(frame)) == 0: + return b'' + + framedata = frame._writeData(config) + + usize = len(framedata) + if usize > 2048: + # Disabled as this causes iTunes and other programs + # to fail to find these frames, which usually includes + # e.g. APIC. + # framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib') + # flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN + pass + + if config.v2_version == 4: + bits = 7 + elif config.v2_version == 3: + bits = 8 + else: + raise ValueError + + datasize = BitPaddedInt.to_str(len(framedata), width=4, bits=bits) + + if name is not None: + assert isinstance(name, bytes) + frame_name = name + else: + frame_name = type(frame).__name__ + if PY3: + frame_name = frame_name.encode("ascii") + + header = struct.pack('>4s4sH', frame_name, datasize, flags) + return header + framedata + + +def read_frames(id3, data, frames): + """Does not error out""" + + assert id3.version >= ID3Header._V22 + + result = [] + unsupported_frames = [] + + if id3.version < ID3Header._V24 and id3.f_unsynch: + try: + data = unsynch.decode(data) + except ValueError: + pass + + if id3.version >= ID3Header._V23: + if id3.version < ID3Header._V24: + bpi = int + else: + bpi = determine_bpi(data, frames) + + while data: + header = data[:10] + try: + name, size, flags = struct.unpack('>4sLH', header) + except struct.error: + break # not enough header + if name.strip(b'\x00') == b'': + break + + size = bpi(size) + framedata = data[10:10 + size] + data = data[10 + size:] + if size == 0: + continue # drop empty frames + + if PY3: + try: + name = name.decode('ascii') + except UnicodeDecodeError: + continue + + try: + # someone writes 2.3 frames with 2.2 names + if name[-1] == "\x00": + tag = Frames_2_2[name[:-1]] + name = tag.__base__.__name__ + + tag = frames[name] + except KeyError: + if is_valid_frame_id(name): + unsupported_frames.append(header + framedata) + else: + try: + result.append(tag._fromData(id3, flags, framedata)) + except NotImplementedError: + unsupported_frames.append(header + framedata) + except ID3JunkFrameError: + pass + elif id3.version >= ID3Header._V22: + while data: + header = data[0:6] + try: + name, size = struct.unpack('>3s3s', header) + except struct.error: + break # not enough header + size, = struct.unpack('>L', b'\x00' + size) + if name.strip(b'\x00') == b'': + break + + framedata = data[6:6 + size] + data = data[6 + size:] + if size == 0: + continue # drop empty frames + + if PY3: + try: + name = name.decode('ascii') + except UnicodeDecodeError: + continue + + try: + tag = frames[name] + except KeyError: + if is_valid_frame_id(name): + unsupported_frames.append(header + framedata) + else: + try: + result.append( + tag._fromData(id3, 0, framedata)) + except (ID3EncryptionUnsupportedError, + NotImplementedError): + unsupported_frames.append(header + framedata) + except ID3JunkFrameError: + pass + + return result, unsupported_frames, data diff --git a/libs/mutagen/id3/_util.py b/libs/mutagen/id3/_util.py index 29f7241d..93bb264e 100644 --- a/libs/mutagen/id3/_util.py +++ b/libs/mutagen/id3/_util.py @@ -1,15 +1,27 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # 2013 Christoph Reiter # 2014 Ben Ockmore # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. -from .._compat import long_, integer_types, PY3 -from .._util import MutagenError +from mutagen._compat import long_, integer_types, PY3 +from mutagen._util import MutagenError + + +def is_valid_frame_id(frame_id): + return frame_id.isalnum() and frame_id.isupper() + + +class ID3SaveConfig(object): + + def __init__(self, v2_version=4, v23_separator=None): + assert v2_version in (3, 4) + self.v2_version = v2_version + self.v23_separator = v23_separator class error(MutagenError): @@ -28,7 +40,7 @@ class ID3EncryptionUnsupportedError(error, NotImplementedError): pass -class ID3JunkFrameError(error, ValueError): +class ID3JunkFrameError(error): pass @@ -122,6 +134,8 @@ class BitPaddedInt(int, _BitPaddedMixin): shift = 0 if isinstance(value, integer_types): + if value < 0: + raise ValueError while value: numeric_value += (value & mask) << shift value >>= 8 diff --git a/libs/mutagen/m4a.py b/libs/mutagen/m4a.py index 3ed148c5..c7583f8e 100644 --- a/libs/mutagen/m4a.py +++ b/libs/mutagen/m4a.py @@ -2,8 +2,9 @@ # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """ since 1.9: mutagen.m4a is deprecated; use mutagen.mp4 instead. @@ -14,14 +15,14 @@ since 1.31: mutagen.m4a will no longer work; any operation that could fail import warnings from mutagen import FileType, Tags, StreamInfo -from ._util import DictProxy, MutagenError +from ._util import DictProxy, MutagenError, loadfile warnings.warn( "mutagen.m4a is deprecated; use mutagen.mp4 instead.", DeprecationWarning) -class error(IOError, MutagenError): +class error(MutagenError): pass @@ -33,7 +34,7 @@ class M4AStreamInfoError(error): pass -class M4AMetadataValueError(ValueError, M4AMetadataError): +class M4AMetadataValueError(error): pass @@ -83,7 +84,8 @@ class M4A(FileType): _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] - def load(self, filename): + @loadfile() + def load(self, filething): raise error("deprecated") def add_tags(self): diff --git a/libs/mutagen/monkeysaudio.py b/libs/mutagen/monkeysaudio.py index 0e29273f..82bfcd24 100644 --- a/libs/mutagen/monkeysaudio.py +++ b/libs/mutagen/monkeysaudio.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Monkey's Audio streams with APEv2 tags. @@ -21,7 +21,7 @@ import struct from ._compat import endswith from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete -from mutagen._util import cdata +from mutagen._util import cdata, convert_error class MonkeysAudioHeaderError(error): @@ -29,18 +29,22 @@ class MonkeysAudioHeaderError(error): class MonkeysAudioInfo(StreamInfo): - """Monkey's Audio stream information. + """MonkeysAudioInfo() + + Monkey's Audio stream information. Attributes: - - * channels -- number of audio channels - * length -- file length in seconds, as a float - * sample_rate -- audio sampling rate in Hz - * bits_per_sample -- bits per sample - * version -- Monkey's Audio stream version, as a float (eg: 3.99) + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz + bits_per_sample (`int`): bits per sample + version (`float`): Monkey's Audio stream version, as a float (eg: 3.99) """ + @convert_error(IOError, MonkeysAudioHeaderError) def __init__(self, fileobj): + """Raises MonkeysAudioHeaderError""" + header = fileobj.read(76) if len(header) != 76 or not header.startswith(b"MAC "): raise MonkeysAudioHeaderError("not a Monkey's Audio file") @@ -62,6 +66,9 @@ class MonkeysAudioInfo(StreamInfo): blocks_per_frame = 73728 else: blocks_per_frame = 9216 + self.bits_per_sample = 0 + if header[48:].startswith(b"WAVEfmt"): + self.bits_per_sample = struct.unpack("<H", header[74:76])[0] self.version /= 1000.0 self.length = 0.0 if (self.sample_rate != 0) and (total_frames > 0): @@ -75,6 +82,15 @@ class MonkeysAudioInfo(StreamInfo): class MonkeysAudio(APEv2File): + """MonkeysAudio(filething) + + Arguments: + filething (filething) + + Attributes: + info (`MonkeysAudioInfo`) + """ + _Info = MonkeysAudioInfo _mimes = ["audio/ape", "audio/x-ape"] diff --git a/libs/mutagen/mp3.py b/libs/mutagen/mp3.py deleted file mode 100644 index afb600cf..00000000 --- a/libs/mutagen/mp3.py +++ /dev/null @@ -1,362 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright (C) 2006 Joe Wreschnig -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. - -"""MPEG audio stream information and tags.""" - -import os -import struct - -from ._compat import endswith, xrange -from ._mp3util import XingHeader, XingHeaderError, VBRIHeader, VBRIHeaderError -from mutagen import StreamInfo -from mutagen._util import MutagenError, enum -from mutagen.id3 import ID3FileType, BitPaddedInt, delete - -__all__ = ["MP3", "Open", "delete", "MP3"] - - -class error(RuntimeError, MutagenError): - pass - - -class HeaderNotFoundError(error, IOError): - pass - - -class InvalidMPEGHeader(error, IOError): - pass - - -@enum -class BitrateMode(object): - - UNKNOWN = 0 - """Probably a CBR file, but not sure""" - - CBR = 1 - """Constant Bitrate""" - - VBR = 2 - """Variable Bitrate""" - - ABR = 3 - """Average Bitrate (a variant of VBR)""" - - -def _guess_xing_bitrate_mode(xing): - - if xing.lame_header: - lame = xing.lame_header - if lame.vbr_method in (1, 8): - return BitrateMode.CBR - elif lame.vbr_method in (2, 9): - return BitrateMode.ABR - elif lame.vbr_method in (3, 4, 5, 6): - return BitrateMode.VBR - # everything else undefined, continue guessing - - # info tags get only written by lame for cbr files - if xing.is_info: - return BitrateMode.CBR - - # older lame and non-lame with some variant of vbr - if xing.vbr_scale != -1 or xing.lame_version: - return BitrateMode.VBR - - return BitrateMode.UNKNOWN - - -# Mode values. -STEREO, JOINTSTEREO, DUALCHANNEL, MONO = xrange(4) - - -class MPEGInfo(StreamInfo): - """MPEG audio stream information - - Parse information about an MPEG audio file. This also reads the - Xing VBR header format. - - This code was implemented based on the format documentation at - http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm. - - Useful attributes: - - * length -- audio length, in seconds - * channels -- number of audio channels - * bitrate -- audio bitrate, in bits per second - * sketchy -- if true, the file may not be valid MPEG audio - * encoder_info -- a string containing encoder name and possibly version. - In case a lame tag is present this will start with - ``"LAME "``, if unknown it is empty, otherwise the - text format is undefined. - * bitrate_mode -- a :class:`BitrateMode` - - * track_gain -- replaygain track gain (89db) or None - * track_peak -- replaygain track peak or None - * album_gain -- replaygain album gain (89db) or None - - Useless attributes: - - * version -- MPEG version (1, 2, 2.5) - * layer -- 1, 2, or 3 - * mode -- One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3) - * protected -- whether or not the file is "protected" - * padding -- whether or not audio frames are padded - * sample_rate -- audio sample rate, in Hz - """ - - # Map (version, layer) tuples to bitrates. - __BITRATE = { - (1, 1): [0, 32, 64, 96, 128, 160, 192, 224, - 256, 288, 320, 352, 384, 416, 448], - (1, 2): [0, 32, 48, 56, 64, 80, 96, 112, 128, - 160, 192, 224, 256, 320, 384], - (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112, - 128, 160, 192, 224, 256, 320], - (2, 1): [0, 32, 48, 56, 64, 80, 96, 112, 128, - 144, 160, 176, 192, 224, 256], - (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, - 80, 96, 112, 128, 144, 160], - } - - __BITRATE[(2, 3)] = __BITRATE[(2, 2)] - for i in xrange(1, 4): - __BITRATE[(2.5, i)] = __BITRATE[(2, i)] - - # Map version to sample rates. - __RATES = { - 1: [44100, 48000, 32000], - 2: [22050, 24000, 16000], - 2.5: [11025, 12000, 8000] - } - - sketchy = False - encoder_info = u"" - bitrate_mode = BitrateMode.UNKNOWN - track_gain = track_peak = album_gain = album_peak = None - - def __init__(self, fileobj, offset=None): - """Parse MPEG stream information from a file-like object. - - If an offset argument is given, it is used to start looking - for stream information and Xing headers; otherwise, ID3v2 tags - will be skipped automatically. A correct offset can make - loading files significantly faster. - """ - - try: - size = os.path.getsize(fileobj.name) - except (IOError, OSError, AttributeError): - fileobj.seek(0, 2) - size = fileobj.tell() - - # If we don't get an offset, try to skip an ID3v2 tag. - if offset is None: - fileobj.seek(0, 0) - idata = fileobj.read(10) - try: - id3, insize = struct.unpack('>3sxxx4s', idata) - except struct.error: - id3, insize = b'', 0 - insize = BitPaddedInt(insize) - if id3 == b'ID3' and insize > 0: - offset = insize + 10 - else: - offset = 0 - - # Try to find two valid headers (meaning, very likely MPEG data) - # at the given offset, 30% through the file, 60% through the file, - # and 90% through the file. - for i in [offset, 0.3 * size, 0.6 * size, 0.9 * size]: - try: - self.__try(fileobj, int(i), size - offset) - except error: - pass - else: - break - # If we can't find any two consecutive frames, try to find just - # one frame back at the original offset given. - else: - self.__try(fileobj, offset, size - offset, False) - self.sketchy = True - - def __try(self, fileobj, offset, real_size, check_second=True): - # This is going to be one really long function; bear with it, - # because there's not really a sane point to cut it up. - fileobj.seek(offset, 0) - - # We "know" we have an MPEG file if we find two frames that look like - # valid MPEG data. If we can't find them in 32k of reads, something - # is horribly wrong (the longest frame can only be about 4k). This - # is assuming the offset didn't lie. - data = fileobj.read(32768) - - frame_1 = data.find(b"\xff") - while 0 <= frame_1 <= (len(data) - 4): - frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0] - if ((frame_data >> 16) & 0xE0) != 0xE0: - frame_1 = data.find(b"\xff", frame_1 + 2) - else: - version = (frame_data >> 19) & 0x3 - layer = (frame_data >> 17) & 0x3 - protection = (frame_data >> 16) & 0x1 - bitrate = (frame_data >> 12) & 0xF - sample_rate = (frame_data >> 10) & 0x3 - padding = (frame_data >> 9) & 0x1 - # private = (frame_data >> 8) & 0x1 - self.mode = (frame_data >> 6) & 0x3 - # mode_extension = (frame_data >> 4) & 0x3 - # copyright = (frame_data >> 3) & 0x1 - # original = (frame_data >> 2) & 0x1 - # emphasis = (frame_data >> 0) & 0x3 - if (version == 1 or layer == 0 or sample_rate == 0x3 or - bitrate == 0 or bitrate == 0xF): - frame_1 = data.find(b"\xff", frame_1 + 2) - else: - break - else: - raise HeaderNotFoundError("can't sync to an MPEG frame") - - self.channels = 1 if self.mode == MONO else 2 - - # There is a serious problem here, which is that many flags - # in an MPEG header are backwards. - self.version = [2.5, None, 2, 1][version] - self.layer = 4 - layer - self.protected = not protection - self.padding = bool(padding) - - self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate] - self.bitrate *= 1000 - self.sample_rate = self.__RATES[self.version][sample_rate] - - if self.layer == 1: - frame_length = ( - (12 * self.bitrate // self.sample_rate) + padding) * 4 - frame_size = 384 - elif self.version >= 2 and self.layer == 3: - frame_length = (72 * self.bitrate // self.sample_rate) + padding - frame_size = 576 - else: - frame_length = (144 * self.bitrate // self.sample_rate) + padding - frame_size = 1152 - - if check_second: - possible = int(frame_1 + frame_length) - if possible > len(data) + 4: - raise HeaderNotFoundError("can't sync to second MPEG frame") - try: - frame_data = struct.unpack( - ">H", data[possible:possible + 2])[0] - except struct.error: - raise HeaderNotFoundError("can't sync to second MPEG frame") - if (frame_data & 0xFFE0) != 0xFFE0: - raise HeaderNotFoundError("can't sync to second MPEG frame") - - self.length = 8 * real_size / float(self.bitrate) - - # Try to find/parse the Xing header, which trumps the above length - # and bitrate calculation. - - if self.layer != 3: - return - - # Xing - xing_offset = XingHeader.get_offset(self) - fileobj.seek(offset + frame_1 + xing_offset, 0) - try: - xing = XingHeader(fileobj) - except XingHeaderError: - pass - else: - lame = xing.lame_header - self.sketchy = False - self.bitrate_mode = _guess_xing_bitrate_mode(xing) - if xing.frames != -1: - samples = frame_size * xing.frames - if lame is not None: - samples -= lame.encoder_delay_start - samples -= lame.encoder_padding_end - self.length = float(samples) / self.sample_rate - if xing.bytes != -1 and self.length: - self.bitrate = int((xing.bytes * 8) / self.length) - if xing.lame_version: - self.encoder_info = u"LAME %s" % xing.lame_version - if lame is not None: - self.track_gain = lame.track_gain_adjustment - self.track_peak = lame.track_peak - self.album_gain = lame.album_gain_adjustment - return - - # VBRI - vbri_offset = VBRIHeader.get_offset(self) - fileobj.seek(offset + frame_1 + vbri_offset, 0) - try: - vbri = VBRIHeader(fileobj) - except VBRIHeaderError: - pass - else: - self.bitrate_mode = BitrateMode.VBR - self.encoder_info = u"FhG" - self.sketchy = False - self.length = float(frame_size * vbri.frames) / self.sample_rate - if self.length: - self.bitrate = int((vbri.bytes * 8) / self.length) - - def pprint(self): - info = str(self.bitrate_mode).split(".", 1)[-1] - if self.bitrate_mode == BitrateMode.UNKNOWN: - info = u"CBR?" - if self.encoder_info: - info += ", %s" % self.encoder_info - s = u"MPEG %s layer %d, %d bps (%s), %s Hz, %d chn, %.2f seconds" % ( - self.version, self.layer, self.bitrate, info, - self.sample_rate, self.channels, self.length) - if self.sketchy: - s += u" (sketchy)" - return s - - -class MP3(ID3FileType): - """An MPEG audio (usually MPEG-1 Layer 3) file. - - :ivar info: :class:`MPEGInfo` - :ivar tags: :class:`ID3 <mutagen.id3.ID3>` - """ - - _Info = MPEGInfo - - _mimes = ["audio/mpeg", "audio/mpg", "audio/x-mpeg"] - - @property - def mime(self): - l = self.info.layer - return ["audio/mp%d" % l, "audio/x-mp%d" % l] + super(MP3, self).mime - - @staticmethod - def score(filename, fileobj, header_data): - filename = filename.lower() - - return (header_data.startswith(b"ID3") * 2 + - endswith(filename, b".mp3") + - endswith(filename, b".mp2") + endswith(filename, b".mpg") + - endswith(filename, b".mpeg")) - - -Open = MP3 - - -class EasyMP3(MP3): - """Like MP3, but uses EasyID3 for tags. - - :ivar info: :class:`MPEGInfo` - :ivar tags: :class:`EasyID3 <mutagen.easyid3.EasyID3>` - """ - - from mutagen.easyid3 import EasyID3 as ID3 - ID3 = ID3 diff --git a/libs/mutagen/mp3/__init__.py b/libs/mutagen/mp3/__init__.py new file mode 100644 index 00000000..8ce70e35 --- /dev/null +++ b/libs/mutagen/mp3/__init__.py @@ -0,0 +1,483 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""MPEG audio stream information and tags.""" + +import struct + +from mutagen import StreamInfo +from mutagen._util import MutagenError, enum, BitReader, BitReaderError, \ + convert_error, intround +from mutagen._compat import endswith, xrange +from mutagen.id3 import ID3FileType, delete +from mutagen.id3._util import BitPaddedInt + +from ._util import XingHeader, XingHeaderError, VBRIHeader, VBRIHeaderError + + +__all__ = ["MP3", "Open", "delete", "MP3"] + + +class error(MutagenError): + pass + + +class HeaderNotFoundError(error): + pass + + +class InvalidMPEGHeader(error): + pass + + +@enum +class BitrateMode(object): + + UNKNOWN = 0 + """Probably a CBR file, but not sure""" + + CBR = 1 + """Constant Bitrate""" + + VBR = 2 + """Variable Bitrate""" + + ABR = 3 + """Average Bitrate (a variant of VBR)""" + + +def _guess_xing_bitrate_mode(xing): + + if xing.lame_header: + lame = xing.lame_header + if lame.vbr_method in (1, 8): + return BitrateMode.CBR + elif lame.vbr_method in (2, 9): + return BitrateMode.ABR + elif lame.vbr_method in (3, 4, 5, 6): + return BitrateMode.VBR + # everything else undefined, continue guessing + + # info tags get only written by lame for cbr files + if xing.is_info: + return BitrateMode.CBR + + # older lame and non-lame with some variant of vbr + if xing.vbr_scale != -1 or xing.lame_version_desc: + return BitrateMode.VBR + + return BitrateMode.UNKNOWN + + +# Mode values. +STEREO, JOINTSTEREO, DUALCHANNEL, MONO = xrange(4) + + +class MPEGFrame(object): + + # Map (version, layer) tuples to bitrates. + __BITRATE = { + (1, 1): [0, 32, 64, 96, 128, 160, 192, 224, + 256, 288, 320, 352, 384, 416, 448], + (1, 2): [0, 32, 48, 56, 64, 80, 96, 112, 128, + 160, 192, 224, 256, 320, 384], + (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112, + 128, 160, 192, 224, 256, 320], + (2, 1): [0, 32, 48, 56, 64, 80, 96, 112, 128, + 144, 160, 176, 192, 224, 256], + (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, + 80, 96, 112, 128, 144, 160], + } + + __BITRATE[(2, 3)] = __BITRATE[(2, 2)] + for i in xrange(1, 4): + __BITRATE[(2.5, i)] = __BITRATE[(2, i)] + + # Map version to sample rates. + __RATES = { + 1: [44100, 48000, 32000], + 2: [22050, 24000, 16000], + 2.5: [11025, 12000, 8000] + } + + sketchy = False + + def __init__(self, fileobj): + """Raises HeaderNotFoundError""" + + self.frame_offset = fileobj.tell() + + r = BitReader(fileobj) + try: + if r.bits(11) != 0x7ff: + raise HeaderNotFoundError("invalid sync") + version = r.bits(2) + layer = r.bits(2) + protection = r.bits(1) + bitrate = r.bits(4) + sample_rate = r.bits(2) + padding = r.bits(1) + r.skip(1) # private + self.mode = r.bits(2) + r.skip(6) + except BitReaderError: + raise HeaderNotFoundError("truncated header") + + assert r.get_position() == 32 and r.is_aligned() + + # try to be strict here to redice the chance of a false positive + if version == 1 or layer == 0 or sample_rate == 0x3 or \ + bitrate == 0xf or bitrate == 0: + raise HeaderNotFoundError("invalid header") + + self.channels = 1 if self.mode == MONO else 2 + + self.version = [2.5, None, 2, 1][version] + self.layer = 4 - layer + self.protected = not protection + self.padding = bool(padding) + + self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate] + self.bitrate *= 1000 + self.sample_rate = self.__RATES[self.version][sample_rate] + + if self.layer == 1: + frame_size = 384 + slot = 4 + elif self.version >= 2 and self.layer == 3: + frame_size = 576 + slot = 1 + else: + frame_size = 1152 + slot = 1 + + frame_length = ( + ((frame_size // 8 * self.bitrate) // self.sample_rate) + + padding) * slot + + self.sketchy = True + + # Try to find/parse the Xing header, which trumps the above length + # and bitrate calculation. + if self.layer == 3: + self._parse_vbr_header(fileobj, self.frame_offset, frame_size, + frame_length) + + fileobj.seek(self.frame_offset + frame_length, 0) + + def _parse_vbr_header(self, fileobj, frame_offset, frame_size, + frame_length): + """Does not raise""" + + # Xing + xing_offset = XingHeader.get_offset(self) + fileobj.seek(frame_offset + xing_offset, 0) + try: + xing = XingHeader(fileobj) + except XingHeaderError: + pass + else: + lame = xing.lame_header + self.sketchy = False + self.bitrate_mode = _guess_xing_bitrate_mode(xing) + self.encoder_settings = xing.get_encoder_settings() + if xing.frames != -1: + samples = frame_size * xing.frames + if xing.bytes != -1 and samples > 0: + # the first frame is only included in xing.bytes but + # not in xing.frames, skip it. + audio_bytes = max(0, xing.bytes - frame_length) + self.bitrate = intround(( + audio_bytes * 8 * self.sample_rate) / float(samples)) + if lame is not None: + samples -= lame.encoder_delay_start + samples -= lame.encoder_padding_end + if samples < 0: + # older lame versions wrote bogus delay/padding for short + # files with low bitrate + samples = 0 + self.length = float(samples) / self.sample_rate + if xing.lame_version_desc: + self.encoder_info = u"LAME %s" % xing.lame_version_desc + if lame is not None: + self.track_gain = lame.track_gain_adjustment + self.track_peak = lame.track_peak + self.album_gain = lame.album_gain_adjustment + return + + # VBRI + vbri_offset = VBRIHeader.get_offset(self) + fileobj.seek(frame_offset + vbri_offset, 0) + try: + vbri = VBRIHeader(fileobj) + except VBRIHeaderError: + pass + else: + self.bitrate_mode = BitrateMode.VBR + self.encoder_info = u"FhG" + self.sketchy = False + self.length = float(frame_size * vbri.frames) / self.sample_rate + if self.length: + self.bitrate = int((vbri.bytes * 8) / self.length) + + +def skip_id3(fileobj): + """Might raise IOError""" + + # WMP writes multiple id3s, so skip as many as we find + while True: + idata = fileobj.read(10) + try: + id3, insize = struct.unpack('>3sxxx4s', idata) + except struct.error: + id3, insize = b'', 0 + insize = BitPaddedInt(insize) + if id3 == b'ID3' and insize > 0: + fileobj.seek(insize, 1) + else: + fileobj.seek(-len(idata), 1) + break + + +def iter_sync(fileobj, max_read): + """Iterate over a fileobj and yields on each mpeg sync. + + When yielding the fileobj offset is right before the sync and can be + changed between iterations without affecting the iteration process. + + Might raise IOError. + """ + + read = 0 + size = 2 + last_byte = b"" + is_second = lambda b: ord(b) & 0xe0 == 0xe0 + + while read < max_read: + data_offset = fileobj.tell() + new_data = fileobj.read(min(max_read - read, size)) + if not new_data: + return + read += len(new_data) + + if last_byte == b"\xff" and is_second(new_data[0:1]): + fileobj.seek(data_offset - 1, 0) + yield + + size *= 2 + last_byte = new_data[-1:] + + find_offset = 0 + while True: + index = new_data.find(b"\xff", find_offset) + # if not found or the last byte -> read more + if index == -1 or index == len(new_data) - 1: + break + + if is_second(new_data[index + 1:index + 2]): + fileobj.seek(data_offset + index, 0) + yield + find_offset = index + 1 + + fileobj.seek(data_offset + len(new_data), 0) + + +class MPEGInfo(StreamInfo): + """MPEGInfo() + + MPEG audio stream information + + Parse information about an MPEG audio file. This also reads the + Xing VBR header format. + + This code was implemented based on the format documentation at + http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm. + + Useful attributes: + + Attributes: + length (`float`): audio length, in seconds + channels (`int`): number of audio channels + bitrate (`int`): audio bitrate, in bits per second. + In case :attr:`bitrate_mode` is :attr:`BitrateMode.UNKNOWN` the + bitrate is guessed based on the first frame. + sample_rate (`int`) audio sample rate, in Hz + encoder_info (`mutagen.text`): a string containing encoder name and + possibly version. In case a lame tag is present this will start + with ``"LAME "``, if unknown it is empty, otherwise the + text format is undefined. + encoder_settings (`mutagen.text`): a string containing a guess about + the settings used for encoding. The format is undefined and + depends on the encoder. + bitrate_mode (`BitrateMode`): a :class:`BitrateMode` + track_gain (`float` or `None`): replaygain track gain (89db) or None + track_peak (`float` or `None`): replaygain track peak or None + album_gain (`float` or `None`): replaygain album gain (89db) or None + + Useless attributes: + + Attributes: + version (`float`): MPEG version (1, 2, 2.5) + layer (`int`): 1, 2, or 3 + mode (`int`): One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3) + protected (`bool`): whether or not the file is "protected" + sketchy (`bool`): if true, the file may not be valid MPEG audio + """ + + sketchy = False + encoder_info = u"" + encoder_settings = u"" + bitrate_mode = BitrateMode.UNKNOWN + track_gain = track_peak = album_gain = album_peak = None + + @convert_error(IOError, error) + def __init__(self, fileobj, offset=None): + """Parse MPEG stream information from a file-like object. + + If an offset argument is given, it is used to start looking + for stream information and Xing headers; otherwise, ID3v2 tags + will be skipped automatically. A correct offset can make + loading files significantly faster. + + Raises HeaderNotFoundError, error + """ + + if offset is None: + fileobj.seek(0, 0) + else: + fileobj.seek(offset, 0) + + # skip anyway, because wmp stacks multiple id3 tags + skip_id3(fileobj) + + # find a sync in the first 1024K, give up after some invalid syncs + max_read = 1024 * 1024 + max_syncs = 1000 + enough_frames = 4 + min_frames = 2 + + self.sketchy = True + frames = [] + first_frame = None + + for _ in iter_sync(fileobj, max_read): + max_syncs -= 1 + if max_syncs <= 0: + break + + for _ in xrange(enough_frames): + try: + frame = MPEGFrame(fileobj) + except HeaderNotFoundError: + break + frames.append(frame) + if not frame.sketchy: + break + + # if we have min frames, save it in case this is all we get + if len(frames) >= min_frames and first_frame is None: + first_frame = frames[0] + + # if the last frame was a non-sketchy one (has a valid vbr header) + # we use that + if frames and not frames[-1].sketchy: + first_frame = frames[-1] + self.sketchy = False + break + + # if we have enough valid frames, use the first + if len(frames) >= enough_frames: + first_frame = frames[0] + self.sketchy = False + break + + # otherwise start over with the next sync + del frames[:] + + if first_frame is None: + raise HeaderNotFoundError("can't sync to MPEG frame") + + assert first_frame + + self.length = -1 + sketchy = self.sketchy + self.__dict__.update(first_frame.__dict__) + self.sketchy = sketchy + + # no length, estimate based on file size + if self.length == -1: + fileobj.seek(0, 2) + content_size = fileobj.tell() - first_frame.frame_offset + self.length = 8 * content_size / float(self.bitrate) + + def pprint(self): + info = str(self.bitrate_mode).split(".", 1)[-1] + if self.bitrate_mode == BitrateMode.UNKNOWN: + info = u"CBR?" + if self.encoder_info: + info += ", %s" % self.encoder_info + if self.encoder_settings: + info += ", %s" % self.encoder_settings + s = u"MPEG %s layer %d, %d bps (%s), %s Hz, %d chn, %.2f seconds" % ( + self.version, self.layer, self.bitrate, info, + self.sample_rate, self.channels, self.length) + if self.sketchy: + s += u" (sketchy)" + return s + + +class MP3(ID3FileType): + """MP3(filething) + + An MPEG audio (usually MPEG-1 Layer 3) file. + + Arguments: + filething (filething) + + Attributes: + info (`MPEGInfo`) + tags (`mutagen.id3.ID3`) + """ + + _Info = MPEGInfo + + _mimes = ["audio/mpeg", "audio/mpg", "audio/x-mpeg"] + + @property + def mime(self): + l = self.info.layer + return ["audio/mp%d" % l, "audio/x-mp%d" % l] + super(MP3, self).mime + + @staticmethod + def score(filename, fileobj, header_data): + filename = filename.lower() + + return (header_data.startswith(b"ID3") * 2 + + endswith(filename, b".mp3") + + endswith(filename, b".mp2") + endswith(filename, b".mpg") + + endswith(filename, b".mpeg")) + + +Open = MP3 + + +class EasyMP3(MP3): + """EasyMP3(filething) + + Like MP3, but uses EasyID3 for tags. + + Arguments: + filething (filething) + + Attributes: + info (`MPEGInfo`) + tags (`mutagen.easyid3.EasyID3`) + """ + + from mutagen.easyid3 import EasyID3 as ID3 + ID3 = ID3 diff --git a/libs/mutagen/_mp3util.py b/libs/mutagen/mp3/_util.py similarity index 69% rename from libs/mutagen/_mp3util.py rename to libs/mutagen/mp3/_util.py index 409cadcb..fd1b5ca3 100644 --- a/libs/mutagen/_mp3util.py +++ b/libs/mutagen/mp3/_util.py @@ -2,18 +2,20 @@ # Copyright 2015 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """ http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header http://wiki.hydrogenaud.io/index.php?title=MP3 """ +from __future__ import division from functools import partial -from ._util import cdata, BitReader -from ._compat import xrange, iterbytes, cBytesIO +from mutagen._util import cdata, BitReader +from mutagen._compat import xrange, iterbytes, cBytesIO class LAMEError(Exception): @@ -36,7 +38,9 @@ class LAMEHeader(object): """VBR quality: 0..9""" track_peak = None - """Peak signal amplitude as float. None if unknown.""" + """Peak signal amplitude as float. 1.0 is maximal signal amplitude + in decoded format. None if unknown. + """ track_gain_origin = 0 """see the docs""" @@ -122,8 +126,7 @@ class LAMEHeader(object): self.track_peak = None else: # see PutLameVBR() in LAME's VbrTag.c - self.track_peak = ( - cdata.uint32_be(track_peak_data) - 0.5) / 2 ** 23 + self.track_peak = cdata.uint32_be(track_peak_data) / 2 ** 23 track_gain_type = r.bits(3) self.track_gain_origin = r.bits(3) sign = r.bits(1) @@ -173,6 +176,97 @@ class LAMEHeader(object): self.header_crc = r.bits(16) assert r.is_aligned() + def guess_settings(self, major, minor): + """Gives a guess about the encoder settings used. Returns an empty + string if unknown. + + The guess is mostly correct in case the file was encoded with + the default options (-V --preset --alt-preset --abr -b etc) and no + other fancy options. + + Args: + major (int) + minor (int) + Returns: + text + """ + + version = major, minor + + if self.vbr_method == 2: + if version in ((3, 90), (3, 91), (3, 92)) and self.encoding_flags: + if self.bitrate < 255: + return u"--alt-preset %d" % self.bitrate + else: + return u"--alt-preset %d+" % self.bitrate + if self.preset_used != 0: + return u"--preset %d" % self.preset_used + elif self.bitrate < 255: + return u"--abr %d" % self.bitrate + else: + return u"--abr %d+" % self.bitrate + elif self.vbr_method == 1: + if self.preset_used == 0: + if self.bitrate < 255: + return u"-b %d" % self.bitrate + else: + return u"-b 255+" + elif self.preset_used == 1003: + return u"--preset insane" + return u"-b %d" % self.preset_used + elif version in ((3, 90), (3, 91), (3, 92)): + preset_key = (self.vbr_quality, self.quality, self.vbr_method, + self.lowpass_filter, self.ath_type) + if preset_key == (1, 2, 4, 19500, 3): + return u"--preset r3mix" + if preset_key == (2, 2, 3, 19000, 4): + return u"--alt-preset standard" + if preset_key == (2, 2, 3, 19500, 2): + return u"--alt-preset extreme" + + if self.vbr_method == 3: + return u"-V %s" % self.vbr_quality + elif self.vbr_method in (4, 5): + return u"-V %s --vbr-new" % self.vbr_quality + elif version in ((3, 93), (3, 94), (3, 95), (3, 96), (3, 97)): + if self.preset_used == 1001: + return u"--preset standard" + elif self.preset_used == 1002: + return u"--preset extreme" + elif self.preset_used == 1004: + return u"--preset fast standard" + elif self.preset_used == 1005: + return u"--preset fast extreme" + elif self.preset_used == 1006: + return u"--preset medium" + elif self.preset_used == 1007: + return u"--preset fast medium" + + if self.vbr_method == 3: + return u"-V %s" % self.vbr_quality + elif self.vbr_method in (4, 5): + return u"-V %s --vbr-new" % self.vbr_quality + elif version == (3, 98): + if self.vbr_method == 3: + return u"-V %s --vbr-old" % self.vbr_quality + elif self.vbr_method in (4, 5): + return u"-V %s" % self.vbr_quality + elif version >= (3, 99): + if self.vbr_method == 3: + return u"-V %s --vbr-old" % self.vbr_quality + elif self.vbr_method in (4, 5): + p = self.vbr_quality + adjust_key = (p, self.bitrate, self.lowpass_filter) + # https://sourceforge.net/p/lame/bugs/455/ + p = { + (5, 32, 0): 7, + (5, 8, 0): 8, + (6, 8, 0): 9, + }.get(adjust_key, p) + return u"-V %s" % p + + return u"" + @classmethod def parse_version(cls, fileobj): """Returns a version string and True if a LAMEHeader follows. @@ -211,9 +305,9 @@ class LAMEHeader(object): if (major, minor) < (3, 90) or ( (major, minor) == (3, 90) and data[-11:-10] == b"("): flag = data.strip(b"\x00").rstrip().decode("ascii") - return u"%d.%d%s" % (major, minor, flag), False + return (major, minor), u"%d.%d%s" % (major, minor, flag), False - if len(data) <= 11: + if len(data) < 11: raise LAMEError("Invalid version: too long") flag = data[:-11].rstrip(b"\x00") @@ -239,7 +333,8 @@ class LAMEHeader(object): # extended header, seek back to 9 bytes for the caller fileobj.seek(-11, 1) - return u"%d.%d%s%s" % (major, minor, patch, flag_string), True + return (major, minor), \ + u"%d.%d%s%s" % (major, minor, patch, flag_string), True class XingHeaderError(Exception): @@ -273,7 +368,10 @@ class XingHeader(object): lame_header = None """A LAMEHeader instance or None""" - lame_version = u"" + lame_version = (0, 0) + """The LAME version as two element tuple (major, minor)""" + + lame_version_desc = u"" """The version of the LAME encoder e.g. '3.99.0'. Empty if unknown""" is_info = False @@ -318,12 +416,20 @@ class XingHeader(object): self.vbr_scale = cdata.uint32_be(data) try: - self.lame_version, has_header = LAMEHeader.parse_version(fileobj) + self.lame_version, self.lame_version_desc, has_header = \ + LAMEHeader.parse_version(fileobj) if has_header: self.lame_header = LAMEHeader(self, fileobj) except LAMEError: pass + def get_encoder_settings(self): + """Returns the guessed encoder settings""" + + if self.lame_header is None: + return u"" + return self.lame_header.guess_settings(*self.lame_version) + @classmethod def get_offset(cls, info): """Calculate the offset to the Xing header from the start of the diff --git a/libs/mutagen/mp4/__init__.py b/libs/mutagen/mp4/__init__.py index e3c16a7f..75ec5769 100644 --- a/libs/mutagen/mp4/__init__.py +++ b/libs/mutagen/mp4/__init__.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write MPEG-4 audio files with iTunes metadata. @@ -28,8 +28,8 @@ import sys from mutagen import FileType, Tags, StreamInfo, PaddingInfo from mutagen._constants import GENRES -from mutagen._util import (cdata, insert_bytes, DictProxy, MutagenError, - hashable, enum, get_size, resize_bytes) +from mutagen._util import cdata, insert_bytes, DictProxy, MutagenError, \ + hashable, enum, get_size, resize_bytes, loadfile, convert_error from mutagen._compat import (reraise, PY2, string_types, text_type, chr_, iteritems, PY3, cBytesIO, izip, xrange) from ._atom import Atoms, Atom, AtomError @@ -37,7 +37,7 @@ from ._util import parse_full_atom from ._as_entry import AudioSampleEntry, ASEntryError -class error(IOError, MutagenError): +class error(MutagenError): pass @@ -49,6 +49,10 @@ class MP4StreamInfoError(error): pass +class MP4NoTrackError(MP4StreamInfoError): + pass + + class MP4MetadataValueError(ValueError, MP4MetadataError): pass @@ -58,7 +62,7 @@ __all__ = ['MP4', 'Open', 'delete', 'MP4Cover', 'MP4FreeForm', 'AtomDataType'] @enum class AtomDataType(object): - """Enum for `dataformat` attribute of MP4FreeForm. + """Enum for ``dataformat`` attribute of MP4FreeForm. .. versionadded:: 1.25 """ @@ -132,8 +136,8 @@ class MP4Cover(bytes): """A cover artwork. Attributes: - - * imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG) + imageformat (`AtomDataType`): format of the image + (either FORMAT_JPEG or FORMAT_PNG) """ FORMAT_JPEG = AtomDataType.JPEG @@ -168,8 +172,7 @@ class MP4FreeForm(bytes): """A freeform value. Attributes: - - * dataformat -- format of the data (see AtomDataType) + dataformat (`AtomDataType`): format of the data (see AtomDataType) """ FORMAT_DATA = AtomDataType.IMPLICIT # deprecated @@ -201,7 +204,6 @@ class MP4FreeForm(bytes): AtomDataType(self.dataformat)) - def _name2key(name): if PY2: return name @@ -253,7 +255,9 @@ def _item_sort_key(key, value): class MP4Tags(DictProxy, Tags): - r"""Dictionary containing Apple iTunes metadata list key/values. + r"""MP4Tags() + + Dictionary containing Apple iTunes metadata list key/values. Keys are four byte identifiers, except for freeform ('----') keys. Values are usually unicode strings, but some atoms have a @@ -286,6 +290,8 @@ class MP4Tags(DictProxy, Tags): * 'soco' -- composer sort order * 'sosn' -- show sort order * 'tvsh' -- show name + * '\\xa9wrk' -- work + * '\\xa9mvn' -- movement Boolean values: @@ -298,9 +304,21 @@ class MP4Tags(DictProxy, Tags): * 'trkn' -- track number, total tracks * 'disk' -- disc number, total discs + Integer values: + + * 'tmpo' -- tempo/BPM + * '\\xa9mvc' -- Movement Count + * '\\xa9mvi' -- Movement Index + * 'shwm' -- work/movement + * 'stik' -- Media Kind + * 'rtng' -- Content Rating + * 'tves' -- TV Episode + * 'tvsn' -- TV Season + * 'plID', 'cnID', 'geID', 'atID', 'sfID', 'cmID', 'akID' -- Various iTunes + Internal IDs + Others: - * 'tmpo' -- tempo/BPM, 16 bit int * 'covr' -- cover artwork, list of MP4Cover objects (which are tagged strs) * 'gnre' -- ID3v1 genre. Not supported, use '\\xa9gen' instead. @@ -365,13 +383,16 @@ class MP4Tags(DictProxy, Tags): atom_name = _key2name(key)[:4] if atom_name in self.__atoms: render_func = self.__atoms[atom_name][1] + render_args = self.__atoms[atom_name][2:] else: render_func = type(self).__render_text + render_args = [] - return render_func(self, key, value) + return render_func(self, key, value, *render_args) - def save(self, filename, padding=None): - """Save the metadata to the given filename.""" + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething, padding=None): values = [] items = sorted(self.items(), key=lambda kv: _item_sort_key(*kv)) @@ -395,13 +416,12 @@ class MP4Tags(DictProxy, Tags): data = Atom.render(b"ilst", b"".join(values)) # Find the old atoms. - with open(filename, "rb+") as fileobj: - try: - atoms = Atoms(fileobj) - except AtomError as err: - reraise(error, err, sys.exc_info()[2]) + try: + atoms = Atoms(filething.fileobj) + except AtomError as err: + reraise(error, err, sys.exc_info()[2]) - self.__save(fileobj, atoms, data, padding) + self.__save(filething.fileobj, atoms, data, padding) def __save(self, fileobj, atoms, data, padding): try: @@ -660,30 +680,60 @@ class MP4Tags(DictProxy, Tags): key = _name2key(b"\xa9gen") self.__add(key, values) - def __parse_tempo(self, atom, data): + def __parse_integer(self, atom, data): values = [] for version, flags, data in self.__parse_data(atom, data): - # version = 0, flags = 0 or 21 - if len(data) != 2: - raise MP4MetadataValueError("invalid tempo") - values.append(cdata.ushort_be(data)) + if version != 0: + raise MP4MetadataValueError("unsupported version") + if flags not in (AtomDataType.IMPLICIT, AtomDataType.INTEGER): + raise MP4MetadataValueError("unsupported type") + + if len(data) == 1: + value = cdata.int8(data) + elif len(data) == 2: + value = cdata.int16_be(data) + elif len(data) == 3: + value = cdata.int32_be(data + b"\x00") >> 8 + elif len(data) == 4: + value = cdata.int32_be(data) + elif len(data) == 8: + value = cdata.int64_be(data) + else: + raise MP4MetadataValueError( + "invalid value size %d" % len(data)) + values.append(value) + key = _name2key(atom.name) self.__add(key, values) - def __render_tempo(self, key, value): + def __render_integer(self, key, value, min_bytes): + assert min_bytes in (1, 2, 4, 8) + + data_list = [] try: - if len(value) == 0: - return self.__render_data(key, 0, AtomDataType.INTEGER, b"") + for v in value: + # We default to the int size of the usual values written + # by itunes for compatibility. + if cdata.int8_min <= v <= cdata.int8_max and min_bytes <= 1: + data = cdata.to_int8(v) + elif cdata.int16_min <= v <= cdata.int16_max and \ + min_bytes <= 2: + data = cdata.to_int16_be(v) + elif cdata.int32_min <= v <= cdata.int32_max and \ + min_bytes <= 4: + data = cdata.to_int32_be(v) + elif cdata.int64_min <= v <= cdata.int64_max and \ + min_bytes <= 8: + data = cdata.to_int64_be(v) + else: + raise MP4MetadataValueError( + "value out of range: %r" % value) + data_list.append(data) - if (min(value) < 0) or (max(value) >= 2 ** 16): - raise MP4MetadataValueError( - "invalid 16 bit integers: %r" % value) - except TypeError: - raise MP4MetadataValueError( - "tmpo must be a list of 16 bit integers") + except (TypeError, ValueError, cdata.error) as e: + raise MP4MetadataValueError(e) - values = [cdata.to_ushort_be(v) for v in value] - return self.__render_data(key, 0, AtomDataType.INTEGER, values) + return self.__render_data(key, 0, AtomDataType.INTEGER, data_list) def __parse_bool(self, atom, data): for version, flags, data in self.__parse_data(atom, data): @@ -785,10 +835,24 @@ class MP4Tags(DictProxy, Tags): b"trkn": (__parse_pair, __render_pair), b"disk": (__parse_pair, __render_pair_no_trailing), b"gnre": (__parse_genre, None), - b"tmpo": (__parse_tempo, __render_tempo), + b"plID": (__parse_integer, __render_integer, 8), + b"cnID": (__parse_integer, __render_integer, 4), + b"geID": (__parse_integer, __render_integer, 4), + b"atID": (__parse_integer, __render_integer, 4), + b"sfID": (__parse_integer, __render_integer, 4), + b"cmID": (__parse_integer, __render_integer, 4), + b"akID": (__parse_integer, __render_integer, 1), + b"tvsn": (__parse_integer, __render_integer, 4), + b"tves": (__parse_integer, __render_integer, 4), + b"tmpo": (__parse_integer, __render_integer, 2), + b"\xa9mvi": (__parse_integer, __render_integer, 2), + b"\xa9mvc": (__parse_integer, __render_integer, 2), b"cpil": (__parse_bool, __render_bool), b"pgap": (__parse_bool, __render_bool), b"pcst": (__parse_bool, __render_bool), + b"shwm": (__parse_integer, __render_integer, 1), + b"stik": (__parse_integer, __render_integer, 1), + b"rtng": (__parse_integer, __render_integer, 1), b"covr": (__parse_cover, __render_cover), b"purl": (__parse_text, __render_text), b"egid": (__parse_text, __render_text), @@ -826,35 +890,43 @@ class MP4Tags(DictProxy, Tags): class MP4Info(StreamInfo): - """MPEG-4 stream information. + """MP4Info() + + MPEG-4 stream information. Attributes: + bitrate (`int`): bitrate in bits per second, as an int + length (`float`): file length in seconds, as a float + channels (`int`): number of audio channels + sample_rate (`int`): audio sampling rate in Hz + bits_per_sample (`int`): bits per sample + codec (`mutagen.text`): + * if starting with ``"mp4a"`` uses an mp4a audio codec + (see the codec parameter in rfc6381 for details e.g. + ``"mp4a.40.2"``) + * for everything else see a list of possible values at + http://www.mp4ra.org/codecs.html - * bitrate -- bitrate in bits per second, as an int - * length -- file length in seconds, as a float - * channels -- number of audio channels - * sample_rate -- audio sampling rate in Hz - * bits_per_sample -- bits per sample - * codec (string): - * if starting with ``"mp4a"`` uses an mp4a audio codec - (see the codec parameter in rfc6381 for details e.g. ``"mp4a.40.2"``) - * for everything else see a list of possible values at - http://www.mp4ra.org/codecs.html - - e.g. ``"mp4a"``, ``"alac"``, ``"mp4a.40.2"``, ``"ac-3"`` etc. - * codec_description (string): - Name of the codec used (ALAC, AAC LC, AC-3...). Values might change in - the future, use for display purposes only. + e.g. ``"mp4a"``, ``"alac"``, ``"mp4a.40.2"``, ``"ac-3"`` etc. + codec_description (`mutagen.text`): + Name of the codec used (ALAC, AAC LC, AC-3...). Values might + change in the future, use for display purposes only. """ bitrate = 0 + length = 0.0 channels = 0 sample_rate = 0 bits_per_sample = 0 codec = u"" - codec_name = u"" + codec_description = u"" - def __init__(self, atoms, fileobj): + def __init__(self, *args, **kwargs): + if args or kwargs: + self.load(*args, **kwargs) + + @convert_error(IOError, MP4StreamInfoError) + def load(self, atoms, fileobj): try: moov = atoms[b"moov"] except KeyError: @@ -868,7 +940,7 @@ class MP4Info(StreamInfo): if data[8:12] == b"soun": break else: - raise MP4StreamInfoError("track has no audio data") + raise MP4NoTrackError("track has no audio data") mdhd = trak[b"mdia", b"mdhd"] ok, data = mdhd.read(fileobj) @@ -956,52 +1028,65 @@ class MP4Info(StreamInfo): class MP4(FileType): - """An MPEG-4 audio file, probably containing AAC. + """MP4(filething) + + An MPEG-4 audio file, probably containing AAC. If more than one track is present in the file, the first is used. Only audio ('soun') tracks will be read. - :ivar info: :class:`MP4Info` - :ivar tags: :class:`MP4Tags` + Arguments: + filething (filething) + + Attributes: + info (`MP4Info`) + tags (`MP4Tags`) """ MP4Tags = MP4Tags _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] - def load(self, filename): - self.filename = filename - with open(filename, "rb") as fileobj: - try: - atoms = Atoms(fileobj) - except AtomError as err: - reraise(error, err, sys.exc_info()[2]) + @loadfile() + def load(self, filething): + fileobj = filething.fileobj + try: + atoms = Atoms(fileobj) + except AtomError as err: + reraise(error, err, sys.exc_info()[2]) + + self.info = MP4Info() + try: + self.info.load(atoms, fileobj) + except MP4NoTrackError: + pass + except error: + raise + except Exception as err: + reraise(MP4StreamInfoError, err, sys.exc_info()[2]) + + if not MP4Tags._can_load(atoms): + self.tags = None + else: try: - self.info = MP4Info(atoms, fileobj) + self.tags = self.MP4Tags(atoms, fileobj) except error: raise except Exception as err: - reraise(MP4StreamInfoError, err, sys.exc_info()[2]) + reraise(MP4MetadataError, err, sys.exc_info()[2]) - if not MP4Tags._can_load(atoms): - self.tags = None - self._padding = 0 - else: - try: - self.tags = self.MP4Tags(atoms, fileobj) - except error: - raise - except Exception as err: - reraise(MP4MetadataError, err, sys.exc_info()[2]) - else: - self._padding = self.tags._padding + @property + def _padding(self): + if self.tags is None: + return 0 + else: + return self.tags._padding - def save(self, filename=None, padding=None): - super(MP4, self).save(filename, padding=padding) + def save(self, *args, **kwargs): + """save(filething=None, padding=None)""" - def delete(self, filename=None): - super(MP4, self).delete(filename) + super(MP4, self).save(*args, **kwargs) def add_tags(self): if self.tags is None: @@ -1017,7 +1102,19 @@ class MP4(FileType): Open = MP4 -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - MP4(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = MP4(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/libs/mutagen/mp4/_as_entry.py b/libs/mutagen/mp4/_as_entry.py index 306d5720..15b7e6bc 100644 --- a/libs/mutagen/mp4/_as_entry.py +++ b/libs/mutagen/mp4/_as_entry.py @@ -2,8 +2,9 @@ # Copyright (C) 2014 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. from mutagen._compat import cBytesIO, xrange from mutagen.aac import ProgramConfigElement diff --git a/libs/mutagen/mp4/_atom.py b/libs/mutagen/mp4/_atom.py index f73eb556..cd43a1fe 100644 --- a/libs/mutagen/mp4/_atom.py +++ b/libs/mutagen/mp4/_atom.py @@ -1,14 +1,15 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import struct from mutagen._compat import PY2 +from mutagen._util import convert_error # This is not an exhaustive list of container atoms, but just the # ones this module needs to peek inside. @@ -36,6 +37,7 @@ class Atom(object): children = None + @convert_error(IOError, AtomError) def __init__(self, fileobj, level=0): """May raise AtomError""" @@ -142,6 +144,7 @@ class Atoms(object): This structure should only be used internally by Mutagen. """ + @convert_error(IOError, AtomError) def __init__(self, fileobj): self.atoms = [] fileobj.seek(0, 2) diff --git a/libs/mutagen/mp4/_util.py b/libs/mutagen/mp4/_util.py index 9583334a..43d81c82 100644 --- a/libs/mutagen/mp4/_util.py +++ b/libs/mutagen/mp4/_util.py @@ -2,8 +2,9 @@ # Copyright (C) 2014 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. from mutagen._util import cdata diff --git a/libs/mutagen/musepack.py b/libs/mutagen/musepack.py index 7880958b..c966d939 100644 --- a/libs/mutagen/musepack.py +++ b/libs/mutagen/musepack.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Lukas Lalinsky # Copyright (C) 2012 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Musepack audio streams with APEv2 tags. @@ -22,8 +22,8 @@ import struct from ._compat import endswith, xrange from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete -from mutagen.id3 import BitPaddedInt -from mutagen._util import cdata +from mutagen.id3._util import BitPaddedInt +from mutagen._util import cdata, convert_error, intround class MusepackHeaderError(error): @@ -67,20 +67,24 @@ def _calc_sv8_peak(peak): class MusepackInfo(StreamInfo): - """Musepack stream information. + """MusepackInfo() + + Musepack stream information. Attributes: - - * channels -- number of audio channels - * length -- file length in seconds, as a float - * sample_rate -- audio sampling rate in Hz - * bitrate -- audio bitrate, in bits per second - * version -- Musepack stream version + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz + bitrate (`int`): audio bitrate, in bits per second + version (`int`) Musepack stream version Optional Attributes: - * title_gain, title_peak -- Replay Gain and peak data for this song - * album_gain, album_peak -- Replay Gain and peak data for this album + Attributes: + title_gain (`float`): Replay Gain for this song + title_peak (`float`): Peak data for this song + album_gain (`float`): Replay Gain for this album + album_peak (`float`): Peak data for this album These attributes are only available in stream version 7/8. The gains are a float, +/- some dB. The peaks are a percentage [0..1] of @@ -88,7 +92,10 @@ class MusepackInfo(StreamInfo): VorbisGain, you must multiply the peak by 2. """ + @convert_error(IOError, MusepackHeaderError) def __init__(self, fileobj): + """Raises MusepackHeaderError""" + header = fileobj.read(4) if len(header) != 4: raise MusepackHeaderError("not a Musepack file") @@ -111,7 +118,7 @@ class MusepackInfo(StreamInfo): if not self.bitrate and self.length != 0: fileobj.seek(0, 2) - self.bitrate = int(round(fileobj.tell() * 8 / self.length)) + self.bitrate = intround(fileobj.tell() * 8 / self.length) def __parse_sv8(self, fileobj): # SV8 http://trac.musepack.net/trac/wiki/SV8Specification @@ -161,7 +168,7 @@ class MusepackInfo(StreamInfo): try: self.version = bytearray(fileobj.read(1))[0] - except TypeError: + except (TypeError, IndexError): raise MusepackHeaderError("SH packet ended unexpectedly.") remaining_size -= 1 @@ -256,6 +263,15 @@ class MusepackInfo(StreamInfo): class Musepack(APEv2File): + """Musepack(filething) + + Arguments: + filething (filething) + + Attributes: + info (`MusepackInfo`) + """ + _Info = MusepackInfo _mimes = ["audio/x-musepack", "audio/x-mpc"] diff --git a/libs/mutagen/ogg.py b/libs/mutagen/ogg.py index 9961a966..22d7442c 100644 --- a/libs/mutagen/ogg.py +++ b/libs/mutagen/ogg.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg bitstreams and pages. @@ -21,11 +21,11 @@ import sys import zlib from mutagen import FileType -from mutagen._util import cdata, resize_bytes, MutagenError +from mutagen._util import cdata, resize_bytes, MutagenError, loadfile, seek_end from ._compat import cBytesIO, reraise, chr_, izip, xrange -class error(IOError, MutagenError): +class error(MutagenError): """Ogg stream parsing errors.""" pass @@ -42,14 +42,15 @@ class OggPage(object): to the start of the next page. Attributes: - - * version -- stream structure version (currently always 0) - * position -- absolute stream position (default -1) - * serial -- logical stream serial number (default 0) - * sequence -- page sequence number within logical stream (default 0) - * offset -- offset this page was read from (default None) - * complete -- if the last packet on this page is complete (default True) - * packets -- list of raw packet data (default []) + version (`int`): stream structure version (currently always 0) + position (`int`): absolute stream position (default -1) + serial (`int`): logical stream serial number (default 0) + sequence (`int`): page sequence number within logical stream + (default 0) + offset (`int` or `None`): offset this page was read from (default None) + complete (`bool`): if the last packet on this page is complete + (default True) + packets (List[`bytes`]): list of raw packet data (default []) Note that if 'complete' is false, the next page's 'continued' property must be true (so set both when constructing pages). @@ -67,6 +68,8 @@ class OggPage(object): complete = True def __init__(self, fileobj=None): + """Raises error, IOError, EOFError""" + self.packets = [] if fileobj is None: @@ -431,7 +434,7 @@ class OggPage(object): cls.renumber(fileobj, serial, sequence) @staticmethod - def find_last(fileobj, serial): + def find_last(fileobj, serial, finishing=False): """Find the last page of the stream 'serial'. If the file is not multiplexed this function is fast. If it is, @@ -439,27 +442,36 @@ class OggPage(object): This finds the last page in the actual file object, or the last page in the stream (with eos set), whichever comes first. + + If finishing is True it returns the last page which contains a packet + finishing on it. If there exist pages but none with finishing packets + returns None. + + Returns None in case no page with the serial exists. + Raises error in case this isn't a valid ogg stream. + Raises IOError. """ # For non-muxed streams, look at the last page. - try: - fileobj.seek(-256 * 256, 2) - except IOError: - # The file is less than 64k in length. - fileobj.seek(0) + seek_end(fileobj, 256 * 256) + data = fileobj.read() try: index = data.rindex(b"OggS") except ValueError: raise error("unable to find final Ogg header") bytesobj = cBytesIO(data[index:]) + + def is_valid(page): + return not finishing or page.position != -1 + best_page = None try: page = OggPage(bytesobj) except error: pass else: - if page.serial == serial: + if page.serial == serial and is_valid(page): if page.last: return page else: @@ -471,12 +483,14 @@ class OggPage(object): fileobj.seek(0) try: page = OggPage(fileobj) - while not page.last: + while True: + if page.serial == serial: + if is_valid(page): + best_page = page + if page.last: + break page = OggPage(fileobj) - while page.serial != serial: - page = OggPage(fileobj) - best_page = page - return page + return best_page except error: return best_page except EOFError: @@ -484,65 +498,92 @@ class OggPage(object): class OggFileType(FileType): - """An generic Ogg file.""" + """OggFileType(filething) + + An generic Ogg file. + + Arguments: + filething (filething) + """ _Info = None _Tags = None _Error = None _mimes = ["application/ogg", "application/x-ogg"] - def load(self, filename): - """Load file information from a filename.""" + @loadfile() + def load(self, filething): + """load(filething) - self.filename = filename - with open(filename, "rb") as fileobj: - try: - self.info = self._Info(fileobj) - self.tags = self._Tags(fileobj, self.info) - self.info._post_tags(fileobj) - except error as e: - reraise(self._Error, e, sys.exc_info()[2]) - except EOFError: - raise self._Error("no appropriate stream found") + Load file information from a filename. - def delete(self, filename=None): - """Remove tags from a file. - - If no filename is given, the one most recently loaded is used. + Args: + filething (filething) + Raises: + mutagen.MutagenError """ - if filename is None: - filename = self.filename + fileobj = filething.fileobj + + try: + self.info = self._Info(fileobj) + self.tags = self._Tags(fileobj, self.info) + self.info._post_tags(fileobj) + except (error, IOError) as e: + reraise(self._Error, e, sys.exc_info()[2]) + except EOFError: + raise self._Error("no appropriate stream found") + + @loadfile(writable=True) + def delete(self, filething): + """delete(filething=None) + + Remove tags from a file. + + If no filename is given, the one most recently loaded is used. + + Args: + filething (filething) + Raises: + mutagen.MutagenError + """ + + fileobj = filething.fileobj self.tags.clear() # TODO: we should delegate the deletion to the subclass and not through # _inject. - with open(filename, "rb+") as fileobj: + try: try: self.tags._inject(fileobj, lambda x: 0) except error as e: reraise(self._Error, e, sys.exc_info()[2]) except EOFError: raise self._Error("no appropriate stream found") + except IOError as e: + reraise(self._Error, e, sys.exc_info()[2]) def add_tags(self): raise self._Error - def save(self, filename=None, padding=None): - """Save a tag to a file. + @loadfile(writable=True) + def save(self, filething, padding=None): + """save(filething=None, padding=None) + + Save a tag to a file. If no filename is given, the one most recently loaded is used. + + Args: + filething (filething) + padding (:obj:`mutagen.PaddingFunction`) + Raises: + mutagen.MutagenError """ - if filename is None: - filename = self.filename - fileobj = open(filename, "rb+") try: - try: - self.tags._inject(fileobj, padding) - except error as e: - reraise(self._Error, e, sys.exc_info()[2]) - except EOFError: - raise self._Error("no appropriate stream found") - finally: - fileobj.close() + self.tags._inject(filething.fileobj, padding) + except (IOError, error) as e: + reraise(self._Error, e, sys.exc_info()[2]) + except EOFError: + raise self._Error("no appropriate stream found") diff --git a/libs/mutagen/oggflac.py b/libs/mutagen/oggflac.py index b86226ca..bc073094 100644 --- a/libs/mutagen/oggflac.py +++ b/libs/mutagen/oggflac.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg FLAC comments. @@ -24,6 +24,7 @@ from ._compat import cBytesIO from mutagen import StreamInfo from mutagen.flac import StreamInfo as FLACStreamInfo, error as FLACError from mutagen._vorbis import VCommentDict +from mutagen._util import loadfile, convert_error from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -36,16 +37,19 @@ class OggFLACHeaderError(error): class OggFLACStreamInfo(StreamInfo): - """Ogg FLAC stream info.""" + """OggFLACStreamInfo() + + Ogg FLAC stream info. + + Attributes: + length (`float`): File length in seconds, as a float + channels (`float`): Number of channels + sample_rate (`int`): Sample rate in Hz" + """ length = 0 - """File length in seconds, as a float""" - channels = 0 - """Number of channels""" - sample_rate = 0 - """Sample rate in Hz""" def __init__(self, fileobj): page = OggPage(fileobj) @@ -75,7 +79,9 @@ class OggFLACStreamInfo(StreamInfo): def _post_tags(self, fileobj): if self.length: return - page = OggPage.find_last(fileobj, self.serial) + page = OggPage.find_last(fileobj, self.serial, finishing=True) + if page is None: + raise OggFLACHeaderError self.length = page.position / float(self.sample_rate) def pprint(self): @@ -130,7 +136,17 @@ class OggFLACVComment(VCommentDict): class OggFLAC(OggFileType): - """An Ogg FLAC file.""" + """OggFLAC(filething) + + An Ogg FLAC file. + + Arguments: + filething (filething) + + Attributes: + info (`OggFLACStreamInfo`) + tags (`mutagen._vorbis.VCommentDict`) + """ _Info = OggFLACStreamInfo _Tags = OggFLACVComment @@ -138,13 +154,7 @@ class OggFLAC(OggFileType): _mimes = ["audio/x-oggflac"] info = None - """A `OggFLACStreamInfo`""" - tags = None - """A `VCommentDict`""" - - def save(self, filename=None): - return super(OggFLAC, self).save(filename) @staticmethod def score(filename, fileobj, header): @@ -155,7 +165,19 @@ class OggFLAC(OggFileType): Open = OggFLAC -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggFLAC(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggFLAC(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/libs/mutagen/oggopus.py b/libs/mutagen/oggopus.py index 7154e479..df9c32e8 100644 --- a/libs/mutagen/oggopus.py +++ b/libs/mutagen/oggopus.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2012, 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg Opus comments. @@ -20,7 +20,7 @@ import struct from mutagen import StreamInfo from mutagen._compat import BytesIO -from mutagen._util import get_size +from mutagen._util import get_size, loadfile, convert_error from mutagen._tags import PaddingInfo from mutagen._vorbis import VCommentDict from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -35,13 +35,17 @@ class OggOpusHeaderError(error): class OggOpusInfo(StreamInfo): - """Ogg Opus stream information.""" + """OggOpusInfo() + + Ogg Opus stream information. + + Attributes: + length (`float`): File length in seconds, as a float + channels (`int`): Number of channels + """ length = 0 - """File length in seconds, as a float""" - channels = 0 - """Number of channels""" def __init__(self, fileobj): page = OggPage(fileobj) @@ -65,7 +69,9 @@ class OggOpusInfo(StreamInfo): raise OggOpusHeaderError("version %r unsupported" % major) def _post_tags(self, fileobj): - page = OggPage.find_last(fileobj, self.serial) + page = OggPage.find_last(fileobj, self.serial, finishing=True) + if page is None: + raise OggOpusHeaderError self.length = (page.position - self.__pre_skip) / float(48000) def pprint(self): @@ -131,7 +137,18 @@ class OggOpusVComment(VCommentDict): class OggOpus(OggFileType): - """An Ogg Opus file.""" + """OggOpus(filething) + + An Ogg Opus file. + + Arguments: + filething (filething) + + Attributes: + info (`OggOpusInfo`) + tags (`mutagen._vorbis.VCommentDict`) + + """ _Info = OggOpusInfo _Tags = OggOpusVComment @@ -139,10 +156,7 @@ class OggOpus(OggFileType): _mimes = ["audio/ogg", "audio/ogg; codecs=opus"] info = None - """A `OggOpusInfo`""" - tags = None - """A `VCommentDict`""" @staticmethod def score(filename, fileobj, header): @@ -152,7 +166,19 @@ class OggOpus(OggFileType): Open = OggOpus -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggOpus(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggOpus(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/libs/mutagen/oggspeex.py b/libs/mutagen/oggspeex.py index 9b16930b..de02a449 100644 --- a/libs/mutagen/oggspeex.py +++ b/libs/mutagen/oggspeex.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg Speex comments. @@ -22,7 +22,7 @@ __all__ = ["OggSpeex", "Open", "delete"] from mutagen import StreamInfo from mutagen._vorbis import VCommentDict from mutagen.ogg import OggPage, OggFileType, error as OggError -from mutagen._util import cdata, get_size +from mutagen._util import cdata, get_size, loadfile, convert_error from mutagen._tags import PaddingInfo @@ -35,20 +35,21 @@ class OggSpeexHeaderError(error): class OggSpeexInfo(StreamInfo): - """Ogg Speex stream information.""" + """OggSpeexInfo() + + Ogg Speex stream information. + + Attributes: + length (`float`): file length in seconds, as a float + channels (`int`): number of channels + bitrate (`int`): nominal bitrate in bits per second. The reference + encoder does not set the bitrate; in this case, the bitrate will + be 0. + """ length = 0 - """file length in seconds, as a float""" - channels = 0 - """number of channels""" - bitrate = 0 - """nominal bitrate in bits per second. - - The reference encoder does not set the bitrate; in this case, - the bitrate will be 0. - """ def __init__(self, fileobj): page = OggPage(fileobj) @@ -63,7 +64,9 @@ class OggSpeexInfo(StreamInfo): self.serial = page.serial def _post_tags(self, fileobj): - page = OggPage.find_last(fileobj, self.serial) + page = OggPage.find_last(fileobj, self.serial, finishing=True) + if page is None: + raise OggSpeexHeaderError self.length = page.position / float(self.sample_rate) def pprint(self): @@ -127,7 +130,17 @@ class OggSpeexVComment(VCommentDict): class OggSpeex(OggFileType): - """An Ogg Speex file.""" + """OggSpeex(filething) + + An Ogg Speex file. + + Arguments: + filething (filething) + + Attributes: + info (`OggSpeexInfo`) + tags (`mutagen._vorbis.VCommentDict`) + """ _Info = OggSpeexInfo _Tags = OggSpeexVComment @@ -135,10 +148,7 @@ class OggSpeex(OggFileType): _mimes = ["audio/x-speex"] info = None - """A `OggSpeexInfo`""" - tags = None - """A `VCommentDict`""" @staticmethod def score(filename, fileobj, header): @@ -148,7 +158,19 @@ class OggSpeex(OggFileType): Open = OggSpeex -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggSpeex(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggSpeex(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/libs/mutagen/oggtheora.py b/libs/mutagen/oggtheora.py index 122e7d4b..a18dfd53 100644 --- a/libs/mutagen/oggtheora.py +++ b/libs/mutagen/oggtheora.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg Theora comments. @@ -20,7 +20,7 @@ import struct from mutagen import StreamInfo from mutagen._vorbis import VCommentDict -from mutagen._util import cdata, get_size +from mutagen._util import cdata, get_size, loadfile, convert_error from mutagen._tags import PaddingInfo from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -34,16 +34,19 @@ class OggTheoraHeaderError(error): class OggTheoraInfo(StreamInfo): - """Ogg Theora stream information.""" + """OggTheoraInfo() + + Ogg Theora stream information. + + Attributes: + length (`float`): File length in seconds, as a float + fps (`float`): Video frames per second, as a float + bitrate (`int`): Bitrate in bps (int) + """ length = 0 - """File length in seconds, as a float""" - fps = 0 - """Video frames per second, as a float""" - bitrate = 0 - """Bitrate in bps (int)""" def __init__(self, fileobj): page = OggPage(fileobj) @@ -64,7 +67,9 @@ class OggTheoraInfo(StreamInfo): self.serial = page.serial def _post_tags(self, fileobj): - page = OggPage.find_last(fileobj, self.serial) + page = OggPage.find_last(fileobj, self.serial, finishing=True) + if page is None: + raise OggTheoraHeaderError position = page.position mask = (1 << self.granule_shift) - 1 frames = (position >> self.granule_shift) + (position & mask) @@ -120,7 +125,17 @@ class OggTheoraCommentDict(VCommentDict): class OggTheora(OggFileType): - """An Ogg Theora file.""" + """OggTheora(filething) + + An Ogg Theora file. + + Arguments: + filething (filething) + + Attributes: + info (`OggTheoraInfo`) + tags (`mutagen._vorbis.VCommentDict`) + """ _Info = OggTheoraInfo _Tags = OggTheoraCommentDict @@ -128,10 +143,7 @@ class OggTheora(OggFileType): _mimes = ["video/x-theora"] info = None - """A `OggTheoraInfo`""" - tags = None - """A `VCommentDict`""" @staticmethod def score(filename, fileobj, header): @@ -142,7 +154,19 @@ class OggTheora(OggFileType): Open = OggTheora -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggTheora(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggTheora(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/libs/mutagen/oggvorbis.py b/libs/mutagen/oggvorbis.py index b058a0c1..e3292ae3 100644 --- a/libs/mutagen/oggvorbis.py +++ b/libs/mutagen/oggvorbis.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg Vorbis comments. @@ -21,7 +21,7 @@ import struct from mutagen import StreamInfo from mutagen._vorbis import VCommentDict -from mutagen._util import get_size +from mutagen._util import get_size, loadfile, convert_error from mutagen._tags import PaddingInfo from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -35,21 +35,26 @@ class OggVorbisHeaderError(error): class OggVorbisInfo(StreamInfo): - """Ogg Vorbis stream information.""" + """OggVorbisInfo() - length = 0 - """File length in seconds, as a float""" + Ogg Vorbis stream information. + Attributes: + length (`float`): File length in seconds, as a float + channels (`int`): Number of channels + bitrate (`int`): Nominal ('average') bitrate in bits per second + sample_Rate (`int`): Sample rate in Hz + + """ + + length = 0.0 channels = 0 - """Number of channels""" - bitrate = 0 - """Nominal ('average') bitrate in bits per second, as an int""" - sample_rate = 0 - """Sample rate in Hz""" def __init__(self, fileobj): + """Raises ogg.error, IOError""" + page = OggPage(fileobj) while not page.packets[0].startswith(b"\x01vorbis"): page = OggPage(fileobj) @@ -76,7 +81,11 @@ class OggVorbisInfo(StreamInfo): self.bitrate = nominal_bitrate def _post_tags(self, fileobj): - page = OggPage.find_last(fileobj, self.serial) + """Raises ogg.error""" + + page = OggPage.find_last(fileobj, self.serial, finishing=True) + if page is None: + raise OggVorbisHeaderError self.length = page.position / float(self.sample_rate) def pprint(self): @@ -132,7 +141,17 @@ class OggVCommentDict(VCommentDict): class OggVorbis(OggFileType): - """An Ogg Vorbis file.""" + """OggVorbis(filething) + + Arguments: + filething (filething) + + An Ogg Vorbis file. + + Attributes: + info (`OggVorbisInfo`) + tags (`mutagen._vorbis.VCommentDict`) + """ _Info = OggVorbisInfo _Tags = OggVCommentDict @@ -140,10 +159,7 @@ class OggVorbis(OggFileType): _mimes = ["audio/vorbis", "audio/x-vorbis"] info = None - """A `OggVorbisInfo`""" - tags = None - """A `VCommentDict`""" @staticmethod def score(filename, fileobj, header): @@ -153,7 +169,19 @@ class OggVorbis(OggFileType): Open = OggVorbis -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggVorbis(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggVorbis(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/libs/mutagen/optimfrog.py b/libs/mutagen/optimfrog.py index 0d85a818..830224d6 100644 --- a/libs/mutagen/optimfrog.py +++ b/libs/mutagen/optimfrog.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """OptimFROG audio streams with APEv2 tags. @@ -23,6 +23,7 @@ __all__ = ["OptimFROG", "Open", "delete"] import struct from ._compat import endswith +from ._util import convert_error from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete @@ -32,16 +33,20 @@ class OptimFROGHeaderError(error): class OptimFROGInfo(StreamInfo): - """OptimFROG stream information. + """OptimFROGInfo() + + OptimFROG stream information. Attributes: - - * channels - number of audio channels - * length - file length in seconds, as a float - * sample_rate - audio sampling rate in Hz + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz """ + @convert_error(IOError, OptimFROGHeaderError) def __init__(self, fileobj): + """Raises OptimFROGHeaderError""" + header = fileobj.read(76) if (len(header) != 76 or not header.startswith(b"OFR ") or struct.unpack("<I", header[4:8])[0] not in [12, 15]): @@ -62,6 +67,13 @@ class OptimFROGInfo(StreamInfo): class OptimFROG(APEv2File): + """OptimFROG(filething) + + Attributes: + info (`OptimFROGInfo`) + tags (`mutagen.apev2.APEv2`) + """ + _Info = OptimFROGInfo @staticmethod diff --git a/libs/mutagen/smf.py b/libs/mutagen/smf.py index 2731cf3a..f41d8142 100644 --- a/libs/mutagen/smf.py +++ b/libs/mutagen/smf.py @@ -2,8 +2,9 @@ # Copyright 2015 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Standard MIDI File (SMF)""" @@ -11,6 +12,7 @@ import struct from mutagen import StreamInfo, MutagenError from mutagen._file import FileType +from mutagen._util import loadfile from mutagen._compat import xrange, endswith @@ -163,27 +165,38 @@ def _read_midi_length(fileobj): class SMFInfo(StreamInfo): + """SMFInfo() + + Attributes: + length (`float`): Length in seconds + + """ def __init__(self, fileobj): """Raises SMFError""" self.length = _read_midi_length(fileobj) - """Length in seconds""" def pprint(self): return u"SMF, %.2f seconds" % self.length class SMF(FileType): - """Standard MIDI File (SMF)""" + """SMF(filething) + + Standard MIDI File (SMF) + + Attributes: + info (`SMFInfo`) + tags: `None` + """ _mimes = ["audio/midi", "audio/x-midi"] - def load(self, filename): - self.filename = filename + @loadfile() + def load(self, filething): try: - with open(filename, "rb") as h: - self.info = SMFInfo(h) + self.info = SMFInfo(filething.fileobj) except IOError as e: raise SMFError(e) diff --git a/libs/mutagen/trueaudio.py b/libs/mutagen/trueaudio.py index 1c8d56c4..e62f4556 100644 --- a/libs/mutagen/trueaudio.py +++ b/libs/mutagen/trueaudio.py @@ -1,16 +1,16 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """True Audio audio stream information and tags. True Audio is a lossless format designed for real-time encoding and decoding. This module is based on the documentation at -http://www.true-audio.com/TTA_Lossless_Audio_Codec\_-_Format_Description +http://www.true-audio.com/TTA_Lossless_Audio_Codec\\_-_Format_Description True Audio files use ID3 tags. """ @@ -20,27 +20,31 @@ __all__ = ["TrueAudio", "Open", "delete", "EasyTrueAudio"] from ._compat import endswith from mutagen import StreamInfo from mutagen.id3 import ID3FileType, delete -from mutagen._util import cdata, MutagenError +from mutagen._util import cdata, MutagenError, convert_error -class error(RuntimeError, MutagenError): +class error(MutagenError): pass -class TrueAudioHeaderError(error, IOError): +class TrueAudioHeaderError(error): pass class TrueAudioInfo(StreamInfo): - """True Audio stream information. + """TrueAudioInfo() + + True Audio stream information. Attributes: - - * length - audio length, in seconds - * sample_rate - audio sample rate, in Hz + length (`float`): audio length, in seconds + sample_rate (`int`): audio sample rate, in Hz """ + @convert_error(IOError, TrueAudioHeaderError) def __init__(self, fileobj, offset): + """Raises TrueAudioHeaderError""" + fileobj.seek(offset or 0) header = fileobj.read(18) if len(header) != 18 or not header.startswith(b"TTA"): @@ -55,10 +59,17 @@ class TrueAudioInfo(StreamInfo): class TrueAudio(ID3FileType): - """A True Audio file. + """TrueAudio(filething, ID3=None) - :ivar info: :class:`TrueAudioInfo` - :ivar tags: :class:`ID3 <mutagen.id3.ID3>` + A True Audio file. + + Arguments: + filething (filething) + ID3 (mutagen.id3.ID3) + + Attributes: + info (`TrueAudioInfo`) + tags (`mutagen.id3.ID3`) """ _Info = TrueAudioInfo @@ -74,10 +85,17 @@ Open = TrueAudio class EasyTrueAudio(TrueAudio): - """Like MP3, but uses EasyID3 for tags. + """EasyTrueAudio(filething, ID3=None) - :ivar info: :class:`TrueAudioInfo` - :ivar tags: :class:`EasyID3 <mutagen.easyid3.EasyID3>` + Like MP3, but uses EasyID3 for tags. + + Arguments: + filething (filething) + ID3 (mutagen.id3.ID3) + + Attributes: + info (`TrueAudioInfo`) + tags (`mutagen.easyid3.EasyID3`) """ from mutagen.easyid3 import EasyID3 as ID3 diff --git a/libs/mutagen/wavpack.py b/libs/mutagen/wavpack.py index 80710f6d..290b90c3 100644 --- a/libs/mutagen/wavpack.py +++ b/libs/mutagen/wavpack.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- - # Copyright 2006 Joe Wreschnig # 2014 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """WavPack reading and writing. @@ -21,7 +21,7 @@ __all__ = ["WavPack", "Open", "delete"] from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete -from mutagen._util import cdata +from mutagen._util import cdata, convert_error class WavPackHeaderError(error): @@ -47,6 +47,7 @@ class _WavPackHeader(object): self.crc = crc @classmethod + @convert_error(IOError, WavPackHeaderError) def from_fileobj(cls, fileobj): """A new _WavPackHeader or raises WavPackHeaderError""" @@ -74,11 +75,10 @@ class WavPackInfo(StreamInfo): """WavPack stream information. Attributes: - - * channels - number of audio channels (1 or 2) - * length - file length in seconds, as a float - * sample_rate - audio sampling rate in Hz - * version - WavPack stream version + channels (int): number of audio channels (1 or 2) + length (float: file length in seconds, as a float + sample_rate (int): audio sampling rate in Hz + version (int) WavPack stream version """ def __init__(self, fileobj): diff --git a/libs/unidecode/__init__.py b/libs/unidecode/__init__.py index 3b68de4c..5d968fdb 100644 --- a/libs/unidecode/__init__.py +++ b/libs/unidecode/__init__.py @@ -3,7 +3,7 @@ """Transliterate Unicode text into plain 7-bit ASCII. Example usage: ->>> from unidecode import unidecode: +>>> from unidecode import unidecode >>> unidecode(u"\u5317\u4EB0") "Bei Jing " diff --git a/libs/unidecode/x000.py b/libs/unidecode/x000.py index c3f8f515..27e8d684 100644 --- a/libs/unidecode/x000.py +++ b/libs/unidecode/x000.py @@ -76,9 +76,9 @@ data = ( '1', # 0xb9 'o', # 0xba '>>', # 0xbb -'1/4', # 0xbc -'1/2', # 0xbd -'3/4', # 0xbe +' 1/4 ', # 0xbc +' 1/2 ', # 0xbd +' 3/4 ', # 0xbe '?', # 0xbf 'A', # 0xc0 'A', # 0xc1 diff --git a/libs/unidecode/x002.py b/libs/unidecode/x002.py index ea45441e..d7028cdf 100644 --- a/libs/unidecode/x002.py +++ b/libs/unidecode/x002.py @@ -175,7 +175,7 @@ data = ( ']]', # 0xad 'h', # 0xae 'h', # 0xaf -'k', # 0xb0 +'h', # 0xb0 'h', # 0xb1 'j', # 0xb2 'r', # 0xb3 diff --git a/libs/unidecode/x005.py b/libs/unidecode/x005.py index 2913ffff..ced54426 100644 --- a/libs/unidecode/x005.py +++ b/libs/unidecode/x005.py @@ -161,7 +161,7 @@ data = ( '', # 0x9f '', # 0xa0 '', # 0xa1 -'[?]', # 0xa2 +'', # 0xa2 '', # 0xa3 '', # 0xa4 '', # 0xa5 @@ -185,20 +185,20 @@ data = ( 'a', # 0xb7 'a', # 0xb8 'o', # 0xb9 -'[?]', # 0xba +'o', # 0xba 'u', # 0xbb '\'', # 0xbc '', # 0xbd -'', # 0xbe -'', # 0xbf +'-', # 0xbe +'-', # 0xbf '|', # 0xc0 '', # 0xc1 '', # 0xc2 ':', # 0xc3 '', # 0xc4 -'[?]', # 0xc5 -'[?]', # 0xc6 -'[?]', # 0xc7 +'', # 0xc5 +'n', # 0xc6 +'o', # 0xc7 '[?]', # 0xc8 '[?]', # 0xc9 '[?]', # 0xca @@ -207,14 +207,14 @@ data = ( '[?]', # 0xcd '[?]', # 0xce '[?]', # 0xcf -'', # 0xd0 +'A', # 0xd0 'b', # 0xd1 'g', # 0xd2 'd', # 0xd3 'h', # 0xd4 'v', # 0xd5 'z', # 0xd6 -'kh', # 0xd7 +'KH', # 0xd7 't', # 0xd8 'y', # 0xd9 'k', # 0xda @@ -228,11 +228,11 @@ data = ( '`', # 0xe2 'p', # 0xe3 'p', # 0xe4 -'ts', # 0xe5 -'ts', # 0xe6 +'TS', # 0xe5 +'TS', # 0xe6 'q', # 0xe7 'r', # 0xe8 -'sh', # 0xe9 +'SH', # 0xe9 't', # 0xea '[?]', # 0xeb '[?]', # 0xec @@ -240,13 +240,13 @@ data = ( '[?]', # 0xee '[?]', # 0xef 'V', # 0xf0 -'oy', # 0xf1 +'OY', # 0xf1 'i', # 0xf2 '\'', # 0xf3 '"', # 0xf4 -'[?]', # 0xf5 -'[?]', # 0xf6 -'[?]', # 0xf7 +'v', # 0xf5 +'n', # 0xf6 +'q', # 0xf7 '[?]', # 0xf8 '[?]', # 0xf9 '[?]', # 0xfa diff --git a/libs/unidecode/x01d.py b/libs/unidecode/x01d.py index 5b659060..83da3183 100644 --- a/libs/unidecode/x01d.py +++ b/libs/unidecode/x01d.py @@ -1,39 +1,39 @@ data = ( -'', # 0x00 -'', # 0x01 -'', # 0x02 -'', # 0x03 -'', # 0x04 -'', # 0x05 -'', # 0x06 -'', # 0x07 -'', # 0x08 -'', # 0x09 -'', # 0x0a -'', # 0x0b -'', # 0x0c -'', # 0x0d -'', # 0x0e -'', # 0x0f +'A', # 0x00 +'AE', # 0x01 +'ae', # 0x02 +'B', # 0x03 +'C', # 0x04 +'D', # 0x05 +'D', # 0x06 +'E', # 0x07 +'e', # 0x08 +'i', # 0x09 +'J', # 0x0a +'K', # 0x0b +'L', # 0x0c +'M', # 0x0d +'N', # 0x0e +'O', # 0x0f '', # 0x10 -'', # 0x11 +'O', # 0x11 '', # 0x12 -'', # 0x13 -'', # 0x14 -'', # 0x15 +'O', # 0x13 +'Oe', # 0x14 +'Ou', # 0x15 '', # 0x16 '', # 0x17 -'', # 0x18 -'', # 0x19 -'', # 0x1a -'', # 0x1b -'', # 0x1c -'', # 0x1d -'', # 0x1e -'', # 0x1f -'', # 0x20 -'', # 0x21 -'', # 0x22 +'P', # 0x18 +'R', # 0x19 +'R', # 0x1a +'T', # 0x1b +'U', # 0x1c +'u', # 0x1d +'u', # 0x1e +'m', # 0x1f +'V', # 0x20 +'W', # 0x21 +'Z', # 0x22 '', # 0x23 '', # 0x24 '', # 0x25 @@ -43,68 +43,68 @@ data = ( '', # 0x29 '', # 0x2a '', # 0x2b -'', # 0x2c -'', # 0x2d -'', # 0x2e -'', # 0x2f -'', # 0x30 -'', # 0x31 -'', # 0x32 -'', # 0x33 -'', # 0x34 -'', # 0x35 -'', # 0x36 -'', # 0x37 -'', # 0x38 -'', # 0x39 -'', # 0x3a -'', # 0x3b -'', # 0x3c -'', # 0x3d -'', # 0x3e -'', # 0x3f -'', # 0x40 -'', # 0x41 -'', # 0x42 -'', # 0x43 -'', # 0x44 -'', # 0x45 -'', # 0x46 -'', # 0x47 -'', # 0x48 -'', # 0x49 +'A', # 0x2c +'AE', # 0x2d +'B', # 0x2e +'B', # 0x2f +'D', # 0x30 +'E', # 0x31 +'E', # 0x32 +'G', # 0x33 +'H', # 0x34 +'I', # 0x35 +'J', # 0x36 +'K', # 0x37 +'L', # 0x38 +'M', # 0x39 +'N', # 0x3a +'N', # 0x3b +'O', # 0x3c +'Ou', # 0x3d +'P', # 0x3e +'R', # 0x3f +'T', # 0x40 +'U', # 0x41 +'W', # 0x42 +'a', # 0x43 +'a', # 0x44 +'a', # 0x45 +'ae', # 0x46 +'b', # 0x47 +'d', # 0x48 +'e', # 0x49 '', # 0x4a -'', # 0x4b -'', # 0x4c -'', # 0x4d -'', # 0x4e -'', # 0x4f -'', # 0x50 +'e', # 0x4b +'e', # 0x4c +'g', # 0x4d +'i', # 0x4e +'k', # 0x4f +'m', # 0x50 '', # 0x51 -'', # 0x52 +'o', # 0x52 '', # 0x53 '', # 0x54 '', # 0x55 -'', # 0x56 -'', # 0x57 -'', # 0x58 -'', # 0x59 -'', # 0x5a -'', # 0x5b +'p', # 0x56 +'t', # 0x57 +'u', # 0x58 +'u', # 0x59 +'m', # 0x5a +'v', # 0x5b '', # 0x5c -'', # 0x5d -'', # 0x5e -'', # 0x5f -'', # 0x60 +'b', # 0x5d +'g', # 0x5e +'d', # 0x5f +'f', # 0x60 '', # 0x61 -'', # 0x62 -'', # 0x63 -'', # 0x64 -'', # 0x65 -'', # 0x66 -'', # 0x67 -'', # 0x68 -'', # 0x69 +'i', # 0x62 +'r', # 0x63 +'u', # 0x64 +'v', # 0x65 +'b', # 0x66 +'g', # 0x67 +'r', # 0x68 +'f', # 0x69 '', # 0x6a '', # 0x6b 'b', # 0x6c diff --git a/libs/unidecode/x020.py b/libs/unidecode/x020.py index b6494730..46425bf0 100644 --- a/libs/unidecode/x020.py +++ b/libs/unidecode/x020.py @@ -94,7 +94,7 @@ data = ( '[?]', # 0x5c '[?]', # 0x5d '[?]', # 0x5e -'[?]', # 0x5f +' ', # 0x5f '', # 0x60 '[?]', # 0x61 '[?]', # 0x62 @@ -112,7 +112,7 @@ data = ( '', # 0x6e '', # 0x6f '0', # 0x70 -'', # 0x71 +'i', # 0x71 '', # 0x72 '', # 0x73 '4', # 0x74 @@ -143,19 +143,19 @@ data = ( '(', # 0x8d ')', # 0x8e '[?]', # 0x8f -'[?]', # 0x90 -'[?]', # 0x91 -'[?]', # 0x92 -'[?]', # 0x93 +'a', # 0x90 +'e', # 0x91 +'o', # 0x92 +'x', # 0x93 '[?]', # 0x94 -'[?]', # 0x95 -'[?]', # 0x96 -'[?]', # 0x97 -'[?]', # 0x98 -'[?]', # 0x99 -'[?]', # 0x9a -'[?]', # 0x9b -'[?]', # 0x9c +'h', # 0x95 +'k', # 0x96 +'l', # 0x97 +'m', # 0x98 +'n', # 0x99 +'p', # 0x9a +'s', # 0x9b +'t', # 0x9c '[?]', # 0x9d '[?]', # 0x9e '[?]', # 0x9f @@ -175,22 +175,22 @@ data = ( 'K', # 0xad 'T', # 0xae 'Dr', # 0xaf -'[?]', # 0xb0 -'[?]', # 0xb1 -'[?]', # 0xb2 -'[?]', # 0xb3 -'[?]', # 0xb4 -'[?]', # 0xb5 -'[?]', # 0xb6 -'[?]', # 0xb7 -'[?]', # 0xb8 -'[?]', # 0xb9 -'[?]', # 0xba -'[?]', # 0xbb -'[?]', # 0xbc -'[?]', # 0xbd -'[?]', # 0xbe -'[?]', # 0xbf +'Pf', # 0xb0 +'P', # 0xb1 +'G', # 0xb2 +'A', # 0xb3 +'UAH', # 0xb4 +'C|', # 0xb5 +'L', # 0xb6 +'Sm', # 0xb7 +'T', # 0xb8 +'Rs', # 0xb9 +'L', # 0xba +'M', # 0xbb +'m', # 0xbc +'R', # 0xbd +'l', # 0xbe +'BTC', # 0xbf '[?]', # 0xc0 '[?]', # 0xc1 '[?]', # 0xc2 diff --git a/libs/unidecode/x021.py b/libs/unidecode/x021.py index 067d9bdc..29f05fd4 100644 --- a/libs/unidecode/x021.py +++ b/libs/unidecode/x021.py @@ -1,33 +1,33 @@ data = ( -'', # 0x00 -'', # 0x01 +' a/c ', # 0x00 +' a/s ', # 0x01 'C', # 0x02 '', # 0x03 '', # 0x04 -'', # 0x05 -'', # 0x06 +' c/o ', # 0x05 +' c/u ', # 0x06 '', # 0x07 '', # 0x08 '', # 0x09 -'', # 0x0a -'', # 0x0b -'', # 0x0c +'g', # 0x0a +'H', # 0x0b +'H', # 0x0c 'H', # 0x0d -'', # 0x0e +'h', # 0x0e '', # 0x0f -'', # 0x10 -'', # 0x11 -'', # 0x12 -'', # 0x13 +'I', # 0x10 +'I', # 0x11 +'L', # 0x12 +'l', # 0x13 '', # 0x14 'N', # 0x15 -'', # 0x16 +'No. ', # 0x16 '', # 0x17 '', # 0x18 'P', # 0x19 'Q', # 0x1a -'', # 0x1b -'', # 0x1c +'R', # 0x1b +'R', # 0x1c 'R', # 0x1d '', # 0x1e '', # 0x1f @@ -39,24 +39,24 @@ data = ( '', # 0x25 '', # 0x26 '', # 0x27 -'', # 0x28 +'Z', # 0x28 '', # 0x29 'K', # 0x2a 'A', # 0x2b -'', # 0x2c -'', # 0x2d +'B', # 0x2c +'C', # 0x2d 'e', # 0x2e 'e', # 0x2f 'E', # 0x30 'F', # 0x31 'F', # 0x32 'M', # 0x33 -'', # 0x34 +'o', # 0x34 '', # 0x35 '', # 0x36 '', # 0x37 '', # 0x38 -'', # 0x39 +'i', # 0x39 '', # 0x3a 'FAX', # 0x3b '', # 0x3c @@ -79,9 +79,9 @@ data = ( '[?]', # 0x4d 'F', # 0x4e '[?]', # 0x4f -'[?]', # 0x50 -'[?]', # 0x51 -'[?]', # 0x52 +' 1/7 ', # 0x50 +' 1/9 ', # 0x51 +' 1/10 ', # 0x52 ' 1/3 ', # 0x53 ' 2/3 ', # 0x54 ' 1/5 ', # 0x55 @@ -136,7 +136,7 @@ data = ( '[?]', # 0x86 '[?]', # 0x87 '[?]', # 0x88 -'[?]', # 0x89 +' 0/3 ', # 0x89 '[?]', # 0x8a '[?]', # 0x8b '[?]', # 0x8c diff --git a/libs/unidecode/x024.py b/libs/unidecode/x024.py index 20b3c8f1..231b0ca1 100644 --- a/libs/unidecode/x024.py +++ b/libs/unidecode/x024.py @@ -181,32 +181,32 @@ data = ( '(x)', # 0xb3 '(y)', # 0xb4 '(z)', # 0xb5 -'a', # 0xb6 -'b', # 0xb7 -'c', # 0xb8 -'d', # 0xb9 -'e', # 0xba -'f', # 0xbb -'g', # 0xbc -'h', # 0xbd -'i', # 0xbe -'j', # 0xbf -'k', # 0xc0 -'l', # 0xc1 -'m', # 0xc2 -'n', # 0xc3 -'o', # 0xc4 -'p', # 0xc5 -'q', # 0xc6 -'r', # 0xc7 -'s', # 0xc8 -'t', # 0xc9 -'u', # 0xca -'v', # 0xcb -'w', # 0xcc -'x', # 0xcd -'y', # 0xce -'z', # 0xcf +'A', # 0xb6 +'B', # 0xb7 +'C', # 0xb8 +'D', # 0xb9 +'E', # 0xba +'F', # 0xbb +'G', # 0xbc +'H', # 0xbd +'I', # 0xbe +'J', # 0xbf +'K', # 0xc0 +'L', # 0xc1 +'M', # 0xc2 +'N', # 0xc3 +'O', # 0xc4 +'P', # 0xc5 +'Q', # 0xc6 +'R', # 0xc7 +'S', # 0xc8 +'T', # 0xc9 +'U', # 0xca +'V', # 0xcb +'W', # 0xcc +'X', # 0xcd +'Y', # 0xce +'Z', # 0xcf 'a', # 0xd0 'b', # 0xd1 'c', # 0xd2 @@ -234,24 +234,25 @@ data = ( 'y', # 0xe8 'z', # 0xe9 '0', # 0xea -'[?]', # 0xeb -'[?]', # 0xec -'[?]', # 0xed -'[?]', # 0xee -'[?]', # 0xef -'[?]', # 0xf0 -'[?]', # 0xf1 -'[?]', # 0xf2 -'[?]', # 0xf3 -'[?]', # 0xf4 -'[?]', # 0xf5 -'[?]', # 0xf6 -'[?]', # 0xf7 -'[?]', # 0xf8 -'[?]', # 0xf9 -'[?]', # 0xfa -'[?]', # 0xfb -'[?]', # 0xfc -'[?]', # 0xfd -'[?]', # 0xfe +'11', # 0xeb +'12', # 0xec +'13', # 0xed +'14', # 0xee +'15', # 0xef +'16', # 0xf0 +'17', # 0xf1 +'18', # 0xf2 +'19', # 0xf3 +'20', # 0xf4 +'1', # 0xf5 +'2', # 0xf6 +'3', # 0xf7 +'4', # 0xf8 +'5', # 0xf9 +'6', # 0xfa +'7', # 0xfb +'8', # 0xfc +'9', # 0xfd +'10', # 0xfe +'0', # 0xff ) diff --git a/libs/unidecode/x032.py b/libs/unidecode/x032.py index 30282d4a..a0c21d11 100644 --- a/libs/unidecode/x032.py +++ b/libs/unidecode/x032.py @@ -203,10 +203,10 @@ data = ( '10M', # 0xc9 '11M', # 0xca '12M', # 0xcb -'[?]', # 0xcc -'[?]', # 0xcd -'[?]', # 0xce -'[?]', # 0xcf +'Hg', # 0xcc +'erg', # 0xcd +'eV', # 0xce +'LTD', # 0xcf 'a', # 0xd0 'i', # 0xd1 'u', # 0xd2 diff --git a/libs/unidecode/x033.py b/libs/unidecode/x033.py index 64eb651a..85310611 100644 --- a/libs/unidecode/x033.py +++ b/libs/unidecode/x033.py @@ -112,16 +112,16 @@ data = ( '22h', # 0x6e '23h', # 0x6f '24h', # 0x70 -'HPA', # 0x71 +'hPa', # 0x71 'da', # 0x72 'AU', # 0x73 'bar', # 0x74 'oV', # 0x75 'pc', # 0x76 -'[?]', # 0x77 -'[?]', # 0x78 -'[?]', # 0x79 -'[?]', # 0x7a +'dm', # 0x77 +'dm^2', # 0x78 +'dm^3', # 0x79 +'IU', # 0x7a 'Heisei', # 0x7b 'Syouwa', # 0x7c 'Taisyou', # 0x7d @@ -129,7 +129,7 @@ data = ( 'Inc.', # 0x7f 'pA', # 0x80 'nA', # 0x81 -'microamp', # 0x82 +'uA', # 0x82 'mA', # 0x83 'kA', # 0x84 'kB', # 0x85 @@ -139,8 +139,8 @@ data = ( 'kcal', # 0x89 'pF', # 0x8a 'nF', # 0x8b -'microFarad', # 0x8c -'microgram', # 0x8d +'uF', # 0x8c +'ug', # 0x8d 'mg', # 0x8e 'kg', # 0x8f 'Hz', # 0x90 @@ -148,13 +148,13 @@ data = ( 'MHz', # 0x92 'GHz', # 0x93 'THz', # 0x94 -'microliter', # 0x95 +'ul', # 0x95 'ml', # 0x96 'dl', # 0x97 'kl', # 0x98 'fm', # 0x99 'nm', # 0x9a -'micrometer', # 0x9b +'um', # 0x9b 'mm', # 0x9c 'cm', # 0x9d 'km', # 0x9e @@ -162,7 +162,7 @@ data = ( 'cm^2', # 0xa0 'm^2', # 0xa1 'km^2', # 0xa2 -'mm^4', # 0xa3 +'mm^3', # 0xa3 'cm^3', # 0xa4 'm^3', # 0xa5 'km^3', # 0xa6 @@ -177,17 +177,17 @@ data = ( 'rad/s^2', # 0xaf 'ps', # 0xb0 'ns', # 0xb1 -'microsecond', # 0xb2 +'us', # 0xb2 'ms', # 0xb3 'pV', # 0xb4 'nV', # 0xb5 -'microvolt', # 0xb6 +'uV', # 0xb6 'mV', # 0xb7 'kV', # 0xb8 'MV', # 0xb9 'pW', # 0xba 'nW', # 0xbb -'microwatt', # 0xbc +'uW', # 0xbc 'mW', # 0xbd 'kW', # 0xbe 'MW', # 0xbf @@ -221,8 +221,8 @@ data = ( 'sr', # 0xdb 'Sv', # 0xdc 'Wb', # 0xdd -'[?]', # 0xde -'[?]', # 0xdf +'V/m', # 0xde +'A/m', # 0xdf '1d', # 0xe0 '2d', # 0xe1 '3d', # 0xe2 @@ -254,4 +254,5 @@ data = ( '29d', # 0xfc '30d', # 0xfd '31d', # 0xfe +'gal', # 0xff ) diff --git a/libs/unidecode/x1f1.py b/libs/unidecode/x1f1.py new file mode 100644 index 00000000..ba0481fc --- /dev/null +++ b/libs/unidecode/x1f1.py @@ -0,0 +1,258 @@ +data = ( +'0.', # 0x00 +'0,', # 0x01 +'1,', # 0x02 +'2,', # 0x03 +'3,', # 0x04 +'4,', # 0x05 +'5,', # 0x06 +'6,', # 0x07 +'7,', # 0x08 +'8,', # 0x09 +'9,', # 0x0a +'', # 0x0b +'', # 0x0c +'', # 0x0d +'', # 0x0e +'', # 0x0f +'(A)', # 0x10 +'(B)', # 0x11 +'(C)', # 0x12 +'(D)', # 0x13 +'(E)', # 0x14 +'(F)', # 0x15 +'(G)', # 0x16 +'(H)', # 0x17 +'(I)', # 0x18 +'(J)', # 0x19 +'(K)', # 0x1a +'(L)', # 0x1b +'(M)', # 0x1c +'(N)', # 0x1d +'(O)', # 0x1e +'(P)', # 0x1f +'(Q)', # 0x20 +'(R)', # 0x21 +'(S)', # 0x22 +'(T)', # 0x23 +'(U)', # 0x24 +'(V)', # 0x25 +'(W)', # 0x26 +'(X)', # 0x27 +'(Y)', # 0x28 +'(Z)', # 0x29 +'', # 0x2a +'', # 0x2b +'', # 0x2c +'', # 0x2d +'', # 0x2e +'', # 0x2f +'', # 0x30 +'', # 0x31 +'', # 0x32 +'', # 0x33 +'', # 0x34 +'', # 0x35 +'', # 0x36 +'', # 0x37 +'', # 0x38 +'', # 0x39 +'', # 0x3a +'', # 0x3b +'', # 0x3c +'', # 0x3d +'', # 0x3e +'', # 0x3f +'', # 0x40 +'', # 0x41 +'', # 0x42 +'', # 0x43 +'', # 0x44 +'', # 0x45 +'', # 0x46 +'', # 0x47 +'', # 0x48 +'', # 0x49 +'', # 0x4a +'', # 0x4b +'', # 0x4c +'', # 0x4d +'', # 0x4e +'', # 0x4f +'', # 0x50 +'', # 0x51 +'', # 0x52 +'', # 0x53 +'', # 0x54 +'', # 0x55 +'', # 0x56 +'', # 0x57 +'', # 0x58 +'', # 0x59 +'', # 0x5a +'', # 0x5b +'', # 0x5c +'', # 0x5d +'', # 0x5e +'', # 0x5f +'', # 0x60 +'', # 0x61 +'', # 0x62 +'', # 0x63 +'', # 0x64 +'', # 0x65 +'', # 0x66 +'', # 0x67 +'', # 0x68 +'', # 0x69 +'', # 0x6a +'', # 0x6b +'', # 0x6c +'', # 0x6d +'', # 0x6e +'', # 0x6f +'', # 0x70 +'', # 0x71 +'', # 0x72 +'', # 0x73 +'', # 0x74 +'', # 0x75 +'', # 0x76 +'', # 0x77 +'', # 0x78 +'', # 0x79 +'', # 0x7a +'', # 0x7b +'', # 0x7c +'', # 0x7d +'', # 0x7e +'', # 0x7f +'', # 0x80 +'', # 0x81 +'', # 0x82 +'', # 0x83 +'', # 0x84 +'', # 0x85 +'', # 0x86 +'', # 0x87 +'', # 0x88 +'', # 0x89 +'', # 0x8a +'', # 0x8b +'', # 0x8c +'', # 0x8d +'', # 0x8e +'', # 0x8f +'', # 0x90 +'', # 0x91 +'', # 0x92 +'', # 0x93 +'', # 0x94 +'', # 0x95 +'', # 0x96 +'', # 0x97 +'', # 0x98 +'', # 0x99 +'', # 0x9a +'', # 0x9b +'', # 0x9c +'', # 0x9d +'', # 0x9e +'', # 0x9f +'', # 0xa0 +'', # 0xa1 +'', # 0xa2 +'', # 0xa3 +'', # 0xa4 +'', # 0xa5 +'', # 0xa6 +'', # 0xa7 +'', # 0xa8 +'', # 0xa9 +'', # 0xaa +'', # 0xab +'', # 0xac +'', # 0xad +'', # 0xae +'', # 0xaf +'', # 0xb0 +'', # 0xb1 +'', # 0xb2 +'', # 0xb3 +'', # 0xb4 +'', # 0xb5 +'', # 0xb6 +'', # 0xb7 +'', # 0xb8 +'', # 0xb9 +'', # 0xba +'', # 0xbb +'', # 0xbc +'', # 0xbd +'', # 0xbe +'', # 0xbf +'', # 0xc0 +'', # 0xc1 +'', # 0xc2 +'', # 0xc3 +'', # 0xc4 +'', # 0xc5 +'', # 0xc6 +'', # 0xc7 +'', # 0xc8 +'', # 0xc9 +'', # 0xca +'', # 0xcb +'', # 0xcc +'', # 0xcd +'', # 0xce +'', # 0xcf +'', # 0xd0 +'', # 0xd1 +'', # 0xd2 +'', # 0xd3 +'', # 0xd4 +'', # 0xd5 +'', # 0xd6 +'', # 0xd7 +'', # 0xd8 +'', # 0xd9 +'', # 0xda +'', # 0xdb +'', # 0xdc +'', # 0xdd +'', # 0xde +'', # 0xdf +'', # 0xe0 +'', # 0xe1 +'', # 0xe2 +'', # 0xe3 +'', # 0xe4 +'', # 0xe5 +'', # 0xe6 +'', # 0xe7 +'', # 0xe8 +'', # 0xe9 +'', # 0xea +'', # 0xeb +'', # 0xec +'', # 0xed +'', # 0xee +'', # 0xef +'', # 0xf0 +'', # 0xf1 +'', # 0xf2 +'', # 0xf3 +'', # 0xf4 +'', # 0xf5 +'', # 0xf6 +'', # 0xf7 +'', # 0xf8 +'', # 0xf9 +'', # 0xfa +'', # 0xfb +'', # 0xfc +'', # 0xfd +'', # 0xfe +'', # 0xff +) diff --git a/libs/yaml/__init__.py b/libs/yaml/__init__.py index 76e19e13..9e35fe29 100644 --- a/libs/yaml/__init__.py +++ b/libs/yaml/__init__.py @@ -1,21 +1,22 @@ -from error import * +from .error import * -from tokens import * -from events import * -from nodes import * +from .tokens import * +from .events import * +from .nodes import * -from loader import * -from dumper import * - -__version__ = '3.11' +from .loader import * +from .dumper import * +__version__ = '3.13' try: - from cyaml import * + from .cyaml import * __with_libyaml__ = True except ImportError: __with_libyaml__ = False +import io + def scan(stream, Loader=Loader): """ Scan a YAML stream and produce scanning tokens. @@ -109,8 +110,7 @@ def emit(events, stream=None, Dumper=Dumper, """ getvalue = None if stream is None: - from StringIO import StringIO - stream = StringIO() + stream = io.StringIO() getvalue = stream.getvalue dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, allow_unicode=allow_unicode, line_break=line_break) @@ -125,7 +125,7 @@ def emit(events, stream=None, Dumper=Dumper, def serialize_all(nodes, stream=None, Dumper=Dumper, canonical=None, indent=None, width=None, allow_unicode=None, line_break=None, - encoding='utf-8', explicit_start=None, explicit_end=None, + encoding=None, explicit_start=None, explicit_end=None, version=None, tags=None): """ Serialize a sequence of representation trees into a YAML stream. @@ -134,10 +134,9 @@ def serialize_all(nodes, stream=None, Dumper=Dumper, getvalue = None if stream is None: if encoding is None: - from StringIO import StringIO + stream = io.StringIO() else: - from cStringIO import StringIO - stream = StringIO() + stream = io.BytesIO() getvalue = stream.getvalue dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, allow_unicode=allow_unicode, line_break=line_break, @@ -164,7 +163,7 @@ def dump_all(documents, stream=None, Dumper=Dumper, default_style=None, default_flow_style=None, canonical=None, indent=None, width=None, allow_unicode=None, line_break=None, - encoding='utf-8', explicit_start=None, explicit_end=None, + encoding=None, explicit_start=None, explicit_end=None, version=None, tags=None): """ Serialize a sequence of Python objects into a YAML stream. @@ -173,10 +172,9 @@ def dump_all(documents, stream=None, Dumper=Dumper, getvalue = None if stream is None: if encoding is None: - from StringIO import StringIO + stream = io.StringIO() else: - from cStringIO import StringIO - stream = StringIO() + stream = io.BytesIO() getvalue = stream.getvalue dumper = Dumper(stream, default_style=default_style, default_flow_style=default_flow_style, @@ -283,13 +281,12 @@ class YAMLObjectMetaclass(type): cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) cls.yaml_dumper.add_representer(cls, cls.to_yaml) -class YAMLObject(object): +class YAMLObject(metaclass=YAMLObjectMetaclass): """ An object that can dump itself to a YAML stream and load itself from a YAML stream. """ - __metaclass__ = YAMLObjectMetaclass __slots__ = () # no direct instantiation, so allow immutable subclasses yaml_loader = Loader @@ -298,18 +295,18 @@ class YAMLObject(object): yaml_tag = None yaml_flow_style = None + @classmethod def from_yaml(cls, loader, node): """ Convert a representation node to a Python object. """ return loader.construct_yaml_object(node, cls) - from_yaml = classmethod(from_yaml) + @classmethod def to_yaml(cls, dumper, data): """ Convert a Python object to a representation node. """ return dumper.represent_yaml_object(cls.yaml_tag, data, cls, flow_style=cls.yaml_flow_style) - to_yaml = classmethod(to_yaml) diff --git a/libs/yaml/composer.py b/libs/yaml/composer.py index 06e5ac78..d5c6a7ac 100644 --- a/libs/yaml/composer.py +++ b/libs/yaml/composer.py @@ -1,14 +1,14 @@ __all__ = ['Composer', 'ComposerError'] -from error import MarkedYAMLError -from events import * -from nodes import * +from .error import MarkedYAMLError +from .events import * +from .nodes import * class ComposerError(MarkedYAMLError): pass -class Composer(object): +class Composer: def __init__(self): self.anchors = {} @@ -66,14 +66,14 @@ class Composer(object): anchor = event.anchor if anchor not in self.anchors: raise ComposerError(None, None, "found undefined alias %r" - % anchor.encode('utf-8'), event.start_mark) + % anchor, event.start_mark) return self.anchors[anchor] event = self.peek_event() anchor = event.anchor if anchor is not None: if anchor in self.anchors: raise ComposerError("found duplicate anchor %r; first occurence" - % anchor.encode('utf-8'), self.anchors[anchor].start_mark, + % anchor, self.anchors[anchor].start_mark, "second occurence", event.start_mark) self.descend_resolver(parent, index) if self.check_event(ScalarEvent): @@ -88,7 +88,7 @@ class Composer(object): def compose_scalar_node(self, anchor): event = self.get_event() tag = event.tag - if tag is None or tag == u'!': + if tag is None or tag == '!': tag = self.resolve(ScalarNode, event.value, event.implicit) node = ScalarNode(tag, event.value, event.start_mark, event.end_mark, style=event.style) @@ -99,7 +99,7 @@ class Composer(object): def compose_sequence_node(self, anchor): start_event = self.get_event() tag = start_event.tag - if tag is None or tag == u'!': + if tag is None or tag == '!': tag = self.resolve(SequenceNode, None, start_event.implicit) node = SequenceNode(tag, [], start_event.start_mark, None, @@ -117,7 +117,7 @@ class Composer(object): def compose_mapping_node(self, anchor): start_event = self.get_event() tag = start_event.tag - if tag is None or tag == u'!': + if tag is None or tag == '!': tag = self.resolve(MappingNode, None, start_event.implicit) node = MappingNode(tag, [], start_event.start_mark, None, diff --git a/libs/yaml/constructor.py b/libs/yaml/constructor.py index 635faac3..981543ae 100644 --- a/libs/yaml/constructor.py +++ b/libs/yaml/constructor.py @@ -2,17 +2,15 @@ __all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', 'ConstructorError'] -from error import * -from nodes import * +from .error import * +from .nodes import * -import datetime - -import binascii, re, sys, types +import collections, datetime, base64, binascii, re, sys, types class ConstructorError(MarkedYAMLError): pass -class BaseConstructor(object): +class BaseConstructor: yaml_constructors = {} yaml_multi_constructors = {} @@ -90,7 +88,7 @@ class BaseConstructor(object): data = constructor(self, tag_suffix, node) if isinstance(data, types.GeneratorType): generator = data - data = generator.next() + data = next(generator) if self.deep_construct: for dummy in generator: pass @@ -125,11 +123,9 @@ class BaseConstructor(object): mapping = {} for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) - try: - hash(key) - except TypeError, exc: + if not isinstance(key, collections.Hashable): raise ConstructorError("while constructing a mapping", node.start_mark, - "found unacceptable key (%s)" % exc, key_node.start_mark) + "found unhashable key", key_node.start_mark) value = self.construct_object(value_node, deep=deep) mapping[key] = value return mapping @@ -146,33 +142,33 @@ class BaseConstructor(object): pairs.append((key, value)) return pairs + @classmethod def add_constructor(cls, tag, constructor): if not 'yaml_constructors' in cls.__dict__: cls.yaml_constructors = cls.yaml_constructors.copy() cls.yaml_constructors[tag] = constructor - add_constructor = classmethod(add_constructor) + @classmethod def add_multi_constructor(cls, tag_prefix, multi_constructor): if not 'yaml_multi_constructors' in cls.__dict__: cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() cls.yaml_multi_constructors[tag_prefix] = multi_constructor - add_multi_constructor = classmethod(add_multi_constructor) class SafeConstructor(BaseConstructor): def construct_scalar(self, node): if isinstance(node, MappingNode): for key_node, value_node in node.value: - if key_node.tag == u'tag:yaml.org,2002:value': + if key_node.tag == 'tag:yaml.org,2002:value': return self.construct_scalar(value_node) - return BaseConstructor.construct_scalar(self, node) + return super().construct_scalar(node) def flatten_mapping(self, node): merge = [] index = 0 while index < len(node.value): key_node, value_node = node.value[index] - if key_node.tag == u'tag:yaml.org,2002:merge': + if key_node.tag == 'tag:yaml.org,2002:merge': del node.value[index] if isinstance(value_node, MappingNode): self.flatten_mapping(value_node) @@ -194,8 +190,8 @@ class SafeConstructor(BaseConstructor): raise ConstructorError("while constructing a mapping", node.start_mark, "expected a mapping or list of mappings for merging, but found %s" % value_node.id, value_node.start_mark) - elif key_node.tag == u'tag:yaml.org,2002:value': - key_node.tag = u'tag:yaml.org,2002:str' + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' index += 1 else: index += 1 @@ -205,19 +201,19 @@ class SafeConstructor(BaseConstructor): def construct_mapping(self, node, deep=False): if isinstance(node, MappingNode): self.flatten_mapping(node) - return BaseConstructor.construct_mapping(self, node, deep=deep) + return super().construct_mapping(node, deep=deep) def construct_yaml_null(self, node): self.construct_scalar(node) return None bool_values = { - u'yes': True, - u'no': False, - u'true': True, - u'false': False, - u'on': True, - u'off': False, + 'yes': True, + 'no': False, + 'true': True, + 'false': False, + 'on': True, + 'off': False, } def construct_yaml_bool(self, node): @@ -225,7 +221,7 @@ class SafeConstructor(BaseConstructor): return self.bool_values[value.lower()] def construct_yaml_int(self, node): - value = str(self.construct_scalar(node)) + value = self.construct_scalar(node) value = value.replace('_', '') sign = +1 if value[0] == '-': @@ -258,7 +254,7 @@ class SafeConstructor(BaseConstructor): nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). def construct_yaml_float(self, node): - value = str(self.construct_scalar(node)) + value = self.construct_scalar(node) value = value.replace('_', '').lower() sign = +1 if value[0] == '-': @@ -282,15 +278,23 @@ class SafeConstructor(BaseConstructor): return sign*float(value) def construct_yaml_binary(self, node): - value = self.construct_scalar(node) try: - return str(value).decode('base64') - except (binascii.Error, UnicodeEncodeError), exc: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: raise ConstructorError(None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) timestamp_regexp = re.compile( - ur'''^(?P<year>[0-9][0-9][0-9][0-9]) + r'''^(?P<year>[0-9][0-9][0-9][0-9]) -(?P<month>[0-9][0-9]?) -(?P<day>[0-9][0-9]?) (?:(?:[Tt]|[ \t]+) @@ -381,11 +385,7 @@ class SafeConstructor(BaseConstructor): data.update(value) def construct_yaml_str(self, node): - value = self.construct_scalar(node) - try: - return value.encode('ascii') - except UnicodeEncodeError: - return value + return self.construct_scalar(node) def construct_yaml_seq(self, node): data = [] @@ -410,55 +410,55 @@ class SafeConstructor(BaseConstructor): def construct_undefined(self, node): raise ConstructorError(None, None, - "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), + "could not determine a constructor for the tag %r" % node.tag, node.start_mark) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:null', + 'tag:yaml.org,2002:null', SafeConstructor.construct_yaml_null) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:bool', + 'tag:yaml.org,2002:bool', SafeConstructor.construct_yaml_bool) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:int', + 'tag:yaml.org,2002:int', SafeConstructor.construct_yaml_int) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:float', + 'tag:yaml.org,2002:float', SafeConstructor.construct_yaml_float) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:binary', + 'tag:yaml.org,2002:binary', SafeConstructor.construct_yaml_binary) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:timestamp', + 'tag:yaml.org,2002:timestamp', SafeConstructor.construct_yaml_timestamp) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:omap', + 'tag:yaml.org,2002:omap', SafeConstructor.construct_yaml_omap) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:pairs', + 'tag:yaml.org,2002:pairs', SafeConstructor.construct_yaml_pairs) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:set', + 'tag:yaml.org,2002:set', SafeConstructor.construct_yaml_set) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:str', + 'tag:yaml.org,2002:str', SafeConstructor.construct_yaml_str) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:seq', + 'tag:yaml.org,2002:seq', SafeConstructor.construct_yaml_seq) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:map', + 'tag:yaml.org,2002:map', SafeConstructor.construct_yaml_map) SafeConstructor.add_constructor(None, @@ -467,13 +467,29 @@ SafeConstructor.add_constructor(None, class Constructor(SafeConstructor): def construct_python_str(self, node): - return self.construct_scalar(node).encode('utf-8') + return self.construct_scalar(node) def construct_python_unicode(self, node): return self.construct_scalar(node) + def construct_python_bytes(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + def construct_python_long(self, node): - return long(self.construct_yaml_int(node)) + return self.construct_yaml_int(node) def construct_python_complex(self, node): return complex(self.construct_scalar(node)) @@ -487,50 +503,46 @@ class Constructor(SafeConstructor): "expected non-empty name appended to the tag", mark) try: __import__(name) - except ImportError, exc: + except ImportError as exc: raise ConstructorError("while constructing a Python module", mark, - "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark) + "cannot find module %r (%s)" % (name, exc), mark) return sys.modules[name] def find_python_name(self, name, mark): if not name: raise ConstructorError("while constructing a Python object", mark, "expected non-empty name appended to the tag", mark) - if u'.' in name: + if '.' in name: module_name, object_name = name.rsplit('.', 1) else: - module_name = '__builtin__' + module_name = 'builtins' object_name = name try: __import__(module_name) - except ImportError, exc: + except ImportError as exc: raise ConstructorError("while constructing a Python object", mark, - "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark) + "cannot find module %r (%s)" % (module_name, exc), mark) module = sys.modules[module_name] if not hasattr(module, object_name): raise ConstructorError("while constructing a Python object", mark, - "cannot find %r in the module %r" % (object_name.encode('utf-8'), - module.__name__), mark) + "cannot find %r in the module %r" + % (object_name, module.__name__), mark) return getattr(module, object_name) def construct_python_name(self, suffix, node): value = self.construct_scalar(node) if value: raise ConstructorError("while constructing a Python name", node.start_mark, - "expected the empty value, but found %r" % value.encode('utf-8'), - node.start_mark) + "expected the empty value, but found %r" % value, node.start_mark) return self.find_python_name(suffix, node.start_mark) def construct_python_module(self, suffix, node): value = self.construct_scalar(node) if value: raise ConstructorError("while constructing a Python module", node.start_mark, - "expected the empty value, but found %r" % value.encode('utf-8'), - node.start_mark) + "expected the empty value, but found %r" % value, node.start_mark) return self.find_python_module(suffix, node.start_mark) - class classobj: pass - def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False): if not args: @@ -538,12 +550,7 @@ class Constructor(SafeConstructor): if not kwds: kwds = {} cls = self.find_python_name(suffix, node.start_mark) - if newobj and isinstance(cls, type(self.classobj)) \ - and not args and not kwds: - instance = self.classobj() - instance.__class__ = cls - return instance - elif newobj and isinstance(cls, type): + if newobj and isinstance(cls, type): return cls.__new__(cls, *args, **kwds) else: return cls(*args, **kwds) @@ -610,66 +617,70 @@ class Constructor(SafeConstructor): return self.construct_python_object_apply(suffix, node, newobj=True) Constructor.add_constructor( - u'tag:yaml.org,2002:python/none', + 'tag:yaml.org,2002:python/none', Constructor.construct_yaml_null) Constructor.add_constructor( - u'tag:yaml.org,2002:python/bool', + 'tag:yaml.org,2002:python/bool', Constructor.construct_yaml_bool) Constructor.add_constructor( - u'tag:yaml.org,2002:python/str', + 'tag:yaml.org,2002:python/str', Constructor.construct_python_str) Constructor.add_constructor( - u'tag:yaml.org,2002:python/unicode', + 'tag:yaml.org,2002:python/unicode', Constructor.construct_python_unicode) Constructor.add_constructor( - u'tag:yaml.org,2002:python/int', + 'tag:yaml.org,2002:python/bytes', + Constructor.construct_python_bytes) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/int', Constructor.construct_yaml_int) Constructor.add_constructor( - u'tag:yaml.org,2002:python/long', + 'tag:yaml.org,2002:python/long', Constructor.construct_python_long) Constructor.add_constructor( - u'tag:yaml.org,2002:python/float', + 'tag:yaml.org,2002:python/float', Constructor.construct_yaml_float) Constructor.add_constructor( - u'tag:yaml.org,2002:python/complex', + 'tag:yaml.org,2002:python/complex', Constructor.construct_python_complex) Constructor.add_constructor( - u'tag:yaml.org,2002:python/list', + 'tag:yaml.org,2002:python/list', Constructor.construct_yaml_seq) Constructor.add_constructor( - u'tag:yaml.org,2002:python/tuple', + 'tag:yaml.org,2002:python/tuple', Constructor.construct_python_tuple) Constructor.add_constructor( - u'tag:yaml.org,2002:python/dict', + 'tag:yaml.org,2002:python/dict', Constructor.construct_yaml_map) Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/name:', + 'tag:yaml.org,2002:python/name:', Constructor.construct_python_name) Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/module:', + 'tag:yaml.org,2002:python/module:', Constructor.construct_python_module) Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object:', + 'tag:yaml.org,2002:python/object:', Constructor.construct_python_object) Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/apply:', + 'tag:yaml.org,2002:python/object/apply:', Constructor.construct_python_object_apply) Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/new:', + 'tag:yaml.org,2002:python/object/new:', Constructor.construct_python_object_new) diff --git a/libs/yaml/cyaml.py b/libs/yaml/cyaml.py index 68dcd751..d5cb87e9 100644 --- a/libs/yaml/cyaml.py +++ b/libs/yaml/cyaml.py @@ -4,12 +4,12 @@ __all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', from _yaml import CParser, CEmitter -from constructor import * +from .constructor import * -from serializer import * -from representer import * +from .serializer import * +from .representer import * -from resolver import * +from .resolver import * class CBaseLoader(CParser, BaseConstructor, BaseResolver): diff --git a/libs/yaml/dumper.py b/libs/yaml/dumper.py index f811d2c9..0b691287 100644 --- a/libs/yaml/dumper.py +++ b/libs/yaml/dumper.py @@ -1,10 +1,10 @@ __all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] -from emitter import * -from serializer import * -from representer import * -from resolver import * +from .emitter import * +from .serializer import * +from .representer import * +from .resolver import * class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): diff --git a/libs/yaml/emitter.py b/libs/yaml/emitter.py index e5bcdccc..34cb145a 100644 --- a/libs/yaml/emitter.py +++ b/libs/yaml/emitter.py @@ -8,13 +8,13 @@ __all__ = ['Emitter', 'EmitterError'] -from error import YAMLError -from events import * +from .error import YAMLError +from .events import * class EmitterError(YAMLError): pass -class ScalarAnalysis(object): +class ScalarAnalysis: def __init__(self, scalar, empty, multiline, allow_flow_plain, allow_block_plain, allow_single_quoted, allow_double_quoted, @@ -28,11 +28,11 @@ class ScalarAnalysis(object): self.allow_double_quoted = allow_double_quoted self.allow_block = allow_block -class Emitter(object): +class Emitter: DEFAULT_TAG_PREFIXES = { - u'!' : u'!', - u'tag:yaml.org,2002:' : u'!!', + '!' : '!', + 'tag:yaml.org,2002:' : '!!', } def __init__(self, stream, canonical=None, indent=None, width=None, @@ -88,8 +88,8 @@ class Emitter(object): self.best_width = 80 if width and width > self.best_indent*2: self.best_width = width - self.best_line_break = u'\n' - if line_break in [u'\r', u'\n', u'\r\n']: + self.best_line_break = '\n' + if line_break in ['\r', '\n', '\r\n']: self.best_line_break = line_break # Tag prefixes. @@ -159,7 +159,7 @@ class Emitter(object): def expect_stream_start(self): if isinstance(self.event, StreamStartEvent): - if self.event.encoding and not getattr(self.stream, 'encoding', None): + if self.event.encoding and not hasattr(self.stream, 'encoding'): self.encoding = self.event.encoding self.write_stream_start() self.state = self.expect_first_document_start @@ -178,15 +178,14 @@ class Emitter(object): def expect_document_start(self, first=False): if isinstance(self.event, DocumentStartEvent): if (self.event.version or self.event.tags) and self.open_ended: - self.write_indicator(u'...', True) + self.write_indicator('...', True) self.write_indent() if self.event.version: version_text = self.prepare_version(self.event.version) self.write_version_directive(version_text) self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() if self.event.tags: - handles = self.event.tags.keys() - handles.sort() + handles = sorted(self.event.tags.keys()) for handle in handles: prefix = self.event.tags[handle] self.tag_prefixes[prefix] = handle @@ -198,13 +197,13 @@ class Emitter(object): and not self.check_empty_document()) if not implicit: self.write_indent() - self.write_indicator(u'---', True) + self.write_indicator('---', True) if self.canonical: self.write_indent() self.state = self.expect_document_root elif isinstance(self.event, StreamEndEvent): if self.open_ended: - self.write_indicator(u'...', True) + self.write_indicator('...', True) self.write_indent() self.write_stream_end() self.state = self.expect_nothing @@ -216,7 +215,7 @@ class Emitter(object): if isinstance(self.event, DocumentEndEvent): self.write_indent() if self.event.explicit: - self.write_indicator(u'...', True) + self.write_indicator('...', True) self.write_indent() self.flush_stream() self.state = self.expect_document_start @@ -239,7 +238,7 @@ class Emitter(object): if isinstance(self.event, AliasEvent): self.expect_alias() elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): - self.process_anchor(u'&') + self.process_anchor('&') self.process_tag() if isinstance(self.event, ScalarEvent): self.expect_scalar() @@ -261,7 +260,7 @@ class Emitter(object): def expect_alias(self): if self.event.anchor is None: raise EmitterError("anchor is not specified for alias") - self.process_anchor(u'*') + self.process_anchor('*') self.state = self.states.pop() def expect_scalar(self): @@ -273,7 +272,7 @@ class Emitter(object): # Flow sequence handlers. def expect_flow_sequence(self): - self.write_indicator(u'[', True, whitespace=True) + self.write_indicator('[', True, whitespace=True) self.flow_level += 1 self.increase_indent(flow=True) self.state = self.expect_first_flow_sequence_item @@ -282,7 +281,7 @@ class Emitter(object): if isinstance(self.event, SequenceEndEvent): self.indent = self.indents.pop() self.flow_level -= 1 - self.write_indicator(u']', False) + self.write_indicator(']', False) self.state = self.states.pop() else: if self.canonical or self.column > self.best_width: @@ -295,12 +294,12 @@ class Emitter(object): self.indent = self.indents.pop() self.flow_level -= 1 if self.canonical: - self.write_indicator(u',', False) + self.write_indicator(',', False) self.write_indent() - self.write_indicator(u']', False) + self.write_indicator(']', False) self.state = self.states.pop() else: - self.write_indicator(u',', False) + self.write_indicator(',', False) if self.canonical or self.column > self.best_width: self.write_indent() self.states.append(self.expect_flow_sequence_item) @@ -309,7 +308,7 @@ class Emitter(object): # Flow mapping handlers. def expect_flow_mapping(self): - self.write_indicator(u'{', True, whitespace=True) + self.write_indicator('{', True, whitespace=True) self.flow_level += 1 self.increase_indent(flow=True) self.state = self.expect_first_flow_mapping_key @@ -318,7 +317,7 @@ class Emitter(object): if isinstance(self.event, MappingEndEvent): self.indent = self.indents.pop() self.flow_level -= 1 - self.write_indicator(u'}', False) + self.write_indicator('}', False) self.state = self.states.pop() else: if self.canonical or self.column > self.best_width: @@ -327,7 +326,7 @@ class Emitter(object): self.states.append(self.expect_flow_mapping_simple_value) self.expect_node(mapping=True, simple_key=True) else: - self.write_indicator(u'?', True) + self.write_indicator('?', True) self.states.append(self.expect_flow_mapping_value) self.expect_node(mapping=True) @@ -336,31 +335,31 @@ class Emitter(object): self.indent = self.indents.pop() self.flow_level -= 1 if self.canonical: - self.write_indicator(u',', False) + self.write_indicator(',', False) self.write_indent() - self.write_indicator(u'}', False) + self.write_indicator('}', False) self.state = self.states.pop() else: - self.write_indicator(u',', False) + self.write_indicator(',', False) if self.canonical or self.column > self.best_width: self.write_indent() if not self.canonical and self.check_simple_key(): self.states.append(self.expect_flow_mapping_simple_value) self.expect_node(mapping=True, simple_key=True) else: - self.write_indicator(u'?', True) + self.write_indicator('?', True) self.states.append(self.expect_flow_mapping_value) self.expect_node(mapping=True) def expect_flow_mapping_simple_value(self): - self.write_indicator(u':', False) + self.write_indicator(':', False) self.states.append(self.expect_flow_mapping_key) self.expect_node(mapping=True) def expect_flow_mapping_value(self): if self.canonical or self.column > self.best_width: self.write_indent() - self.write_indicator(u':', True) + self.write_indicator(':', True) self.states.append(self.expect_flow_mapping_key) self.expect_node(mapping=True) @@ -380,7 +379,7 @@ class Emitter(object): self.state = self.states.pop() else: self.write_indent() - self.write_indicator(u'-', True, indention=True) + self.write_indicator('-', True, indention=True) self.states.append(self.expect_block_sequence_item) self.expect_node(sequence=True) @@ -403,18 +402,18 @@ class Emitter(object): self.states.append(self.expect_block_mapping_simple_value) self.expect_node(mapping=True, simple_key=True) else: - self.write_indicator(u'?', True, indention=True) + self.write_indicator('?', True, indention=True) self.states.append(self.expect_block_mapping_value) self.expect_node(mapping=True) def expect_block_mapping_simple_value(self): - self.write_indicator(u':', False) + self.write_indicator(':', False) self.states.append(self.expect_block_mapping_key) self.expect_node(mapping=True) def expect_block_mapping_value(self): self.write_indent() - self.write_indicator(u':', True, indention=True) + self.write_indicator(':', True, indention=True) self.states.append(self.expect_block_mapping_key) self.expect_node(mapping=True) @@ -433,7 +432,7 @@ class Emitter(object): return False event = self.events[0] return (isinstance(event, ScalarEvent) and event.anchor is None - and event.tag is None and event.implicit and event.value == u'') + and event.tag is None and event.implicit and event.value == '') def check_simple_key(self): length = 0 @@ -478,7 +477,7 @@ class Emitter(object): self.prepared_tag = None return if self.event.implicit[0] and tag is None: - tag = u'!' + tag = '!' self.prepared_tag = None else: if (not self.canonical or tag is None) and self.event.implicit: @@ -541,19 +540,18 @@ class Emitter(object): major, minor = version if major != 1: raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) - return u'%d.%d' % (major, minor) + return '%d.%d' % (major, minor) def prepare_tag_handle(self, handle): if not handle: raise EmitterError("tag handle must not be empty") - if handle[0] != u'!' or handle[-1] != u'!': - raise EmitterError("tag handle must start and end with '!': %r" - % (handle.encode('utf-8'))) + if handle[0] != '!' or handle[-1] != '!': + raise EmitterError("tag handle must start and end with '!': %r" % handle) for ch in handle[1:-1]: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_'): + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): raise EmitterError("invalid character %r in the tag handle: %r" - % (ch.encode('utf-8'), handle.encode('utf-8'))) + % (ch, handle)) return handle def prepare_tag_prefix(self, prefix): @@ -561,12 +559,12 @@ class Emitter(object): raise EmitterError("tag prefix must not be empty") chunks = [] start = end = 0 - if prefix[0] == u'!': + if prefix[0] == '!': end = 1 while end < len(prefix): ch = prefix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?!:@&=+$,_.~*\'()[]': + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?!:@&=+$,_.~*\'()[]': end += 1 else: if start < end: @@ -574,32 +572,31 @@ class Emitter(object): start = end = end+1 data = ch.encode('utf-8') for ch in data: - chunks.append(u'%%%02X' % ord(ch)) + chunks.append('%%%02X' % ord(ch)) if start < end: chunks.append(prefix[start:end]) - return u''.join(chunks) + return ''.join(chunks) def prepare_tag(self, tag): if not tag: raise EmitterError("tag must not be empty") - if tag == u'!': + if tag == '!': return tag handle = None suffix = tag - prefixes = self.tag_prefixes.keys() - prefixes.sort() + prefixes = sorted(self.tag_prefixes.keys()) for prefix in prefixes: if tag.startswith(prefix) \ - and (prefix == u'!' or len(prefix) < len(tag)): + and (prefix == '!' or len(prefix) < len(tag)): handle = self.tag_prefixes[prefix] suffix = tag[len(prefix):] chunks = [] start = end = 0 while end < len(suffix): ch = suffix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.~*\'()[]' \ - or (ch == u'!' and handle != u'!'): + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.~*\'()[]' \ + or (ch == '!' and handle != '!'): end += 1 else: if start < end: @@ -607,23 +604,23 @@ class Emitter(object): start = end = end+1 data = ch.encode('utf-8') for ch in data: - chunks.append(u'%%%02X' % ord(ch)) + chunks.append('%%%02X' % ord(ch)) if start < end: chunks.append(suffix[start:end]) - suffix_text = u''.join(chunks) + suffix_text = ''.join(chunks) if handle: - return u'%s%s' % (handle, suffix_text) + return '%s%s' % (handle, suffix_text) else: - return u'!<%s>' % suffix_text + return '!<%s>' % suffix_text def prepare_anchor(self, anchor): if not anchor: raise EmitterError("anchor must not be empty") for ch in anchor: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_'): + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): raise EmitterError("invalid character %r in the anchor: %r" - % (ch.encode('utf-8'), anchor.encode('utf-8'))) + % (ch, anchor)) return anchor def analyze_scalar(self, scalar): @@ -650,7 +647,7 @@ class Emitter(object): space_break = False # Check document indicators. - if scalar.startswith(u'---') or scalar.startswith(u'...'): + if scalar.startswith('---') or scalar.startswith('...'): block_indicators = True flow_indicators = True @@ -659,7 +656,7 @@ class Emitter(object): # Last character or followed by a whitespace. followed_by_whitespace = (len(scalar) == 1 or - scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') + scalar[1] in '\0 \t\r\n\x85\u2028\u2029') # The previous character is a space. previous_space = False @@ -674,34 +671,34 @@ class Emitter(object): # Check for indicators. if index == 0: # Leading indicators are special characters. - if ch in u'#,[]{}&*!|>\'\"%@`': + if ch in '#,[]{}&*!|>\'\"%@`': flow_indicators = True block_indicators = True - if ch in u'?:': + if ch in '?:': flow_indicators = True if followed_by_whitespace: block_indicators = True - if ch == u'-' and followed_by_whitespace: + if ch == '-' and followed_by_whitespace: flow_indicators = True block_indicators = True else: # Some indicators cannot appear within a scalar as well. - if ch in u',?[]{}': + if ch in ',?[]{}': flow_indicators = True - if ch == u':': + if ch == ':': flow_indicators = True if followed_by_whitespace: block_indicators = True - if ch == u'#' and preceeded_by_whitespace: + if ch == '#' and preceeded_by_whitespace: flow_indicators = True block_indicators = True # Check for line breaks, special, and unicode characters. - if ch in u'\n\x85\u2028\u2029': + if ch in '\n\x85\u2028\u2029': line_breaks = True - if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): - if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': + if not (ch == '\n' or '\x20' <= ch <= '\x7E'): + if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF': unicode_characters = True if not self.allow_unicode: special_characters = True @@ -709,7 +706,7 @@ class Emitter(object): special_characters = True # Detect important whitespace combinations. - if ch == u' ': + if ch == ' ': if index == 0: leading_space = True if index == len(scalar)-1: @@ -718,7 +715,7 @@ class Emitter(object): break_space = True previous_space = True previous_break = False - elif ch in u'\n\x85\u2028\u2029': + elif ch in '\n\x85\u2028\u2029': if index == 0: leading_break = True if index == len(scalar)-1: @@ -733,9 +730,9 @@ class Emitter(object): # Prepare for the next character. index += 1 - preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') + preceeded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') followed_by_whitespace = (index+1 >= len(scalar) or - scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') + scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') # Let's decide what styles are allowed. allow_flow_plain = True @@ -794,7 +791,7 @@ class Emitter(object): def write_stream_start(self): # Write BOM if needed. if self.encoding and self.encoding.startswith('utf-16'): - self.stream.write(u'\uFEFF'.encode(self.encoding)) + self.stream.write('\uFEFF'.encode(self.encoding)) def write_stream_end(self): self.flush_stream() @@ -804,7 +801,7 @@ class Emitter(object): if self.whitespace or not need_whitespace: data = indicator else: - data = u' '+indicator + data = ' '+indicator self.whitespace = whitespace self.indention = self.indention and indention self.column += len(data) @@ -820,7 +817,7 @@ class Emitter(object): self.write_line_break() if self.column < indent: self.whitespace = True - data = u' '*(indent-self.column) + data = ' '*(indent-self.column) self.column = indent if self.encoding: data = data.encode(self.encoding) @@ -838,14 +835,14 @@ class Emitter(object): self.stream.write(data) def write_version_directive(self, version_text): - data = u'%%YAML %s' % version_text + data = '%%YAML %s' % version_text if self.encoding: data = data.encode(self.encoding) self.stream.write(data) self.write_line_break() def write_tag_directive(self, handle_text, prefix_text): - data = u'%%TAG %s %s' % (handle_text, prefix_text) + data = '%%TAG %s %s' % (handle_text, prefix_text) if self.encoding: data = data.encode(self.encoding) self.stream.write(data) @@ -854,7 +851,7 @@ class Emitter(object): # Scalar streams. def write_single_quoted(self, text, split=True): - self.write_indicator(u'\'', True) + self.write_indicator('\'', True) spaces = False breaks = False start = end = 0 @@ -863,7 +860,7 @@ class Emitter(object): if end < len(text): ch = text[end] if spaces: - if ch is None or ch != u' ': + if ch is None or ch != ' ': if start+1 == end and self.column > self.best_width and split \ and start != 0 and end != len(text): self.write_indent() @@ -875,18 +872,18 @@ class Emitter(object): self.stream.write(data) start = end elif breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': + if ch is None or ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': self.write_line_break() for br in text[start:end]: - if br == u'\n': + if br == '\n': self.write_line_break() else: self.write_line_break(br) self.write_indent() start = end else: - if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': + if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': if start < end: data = text[start:end] self.column += len(data) @@ -894,49 +891,49 @@ class Emitter(object): data = data.encode(self.encoding) self.stream.write(data) start = end - if ch == u'\'': - data = u'\'\'' + if ch == '\'': + data = '\'\'' self.column += 2 if self.encoding: data = data.encode(self.encoding) self.stream.write(data) start = end + 1 if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') end += 1 - self.write_indicator(u'\'', False) + self.write_indicator('\'', False) ESCAPE_REPLACEMENTS = { - u'\0': u'0', - u'\x07': u'a', - u'\x08': u'b', - u'\x09': u't', - u'\x0A': u'n', - u'\x0B': u'v', - u'\x0C': u'f', - u'\x0D': u'r', - u'\x1B': u'e', - u'\"': u'\"', - u'\\': u'\\', - u'\x85': u'N', - u'\xA0': u'_', - u'\u2028': u'L', - u'\u2029': u'P', + '\0': '0', + '\x07': 'a', + '\x08': 'b', + '\x09': 't', + '\x0A': 'n', + '\x0B': 'v', + '\x0C': 'f', + '\x0D': 'r', + '\x1B': 'e', + '\"': '\"', + '\\': '\\', + '\x85': 'N', + '\xA0': '_', + '\u2028': 'L', + '\u2029': 'P', } def write_double_quoted(self, text, split=True): - self.write_indicator(u'"', True) + self.write_indicator('"', True) start = end = 0 while end <= len(text): ch = None if end < len(text): ch = text[end] - if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ - or not (u'\x20' <= ch <= u'\x7E' + if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ + or not ('\x20' <= ch <= '\x7E' or (self.allow_unicode - and (u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD'))): + and ('\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD'))): if start < end: data = text[start:end] self.column += len(data) @@ -946,21 +943,21 @@ class Emitter(object): start = end if ch is not None: if ch in self.ESCAPE_REPLACEMENTS: - data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] - elif ch <= u'\xFF': - data = u'\\x%02X' % ord(ch) - elif ch <= u'\uFFFF': - data = u'\\u%04X' % ord(ch) + data = '\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= '\xFF': + data = '\\x%02X' % ord(ch) + elif ch <= '\uFFFF': + data = '\\u%04X' % ord(ch) else: - data = u'\\U%08X' % ord(ch) + data = '\\U%08X' % ord(ch) self.column += len(data) if self.encoding: data = data.encode(self.encoding) self.stream.write(data) start = end+1 - if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ + if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ and self.column+(end-start) > self.best_width and split: - data = text[start:end]+u'\\' + data = text[start:end]+'\\' if start < end: start = end self.column += len(data) @@ -970,30 +967,30 @@ class Emitter(object): self.write_indent() self.whitespace = False self.indention = False - if text[start] == u' ': - data = u'\\' + if text[start] == ' ': + data = '\\' self.column += len(data) if self.encoding: data = data.encode(self.encoding) self.stream.write(data) end += 1 - self.write_indicator(u'"', False) + self.write_indicator('"', False) def determine_block_hints(self, text): - hints = u'' + hints = '' if text: - if text[0] in u' \n\x85\u2028\u2029': - hints += unicode(self.best_indent) - if text[-1] not in u'\n\x85\u2028\u2029': - hints += u'-' - elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': - hints += u'+' + if text[0] in ' \n\x85\u2028\u2029': + hints += str(self.best_indent) + if text[-1] not in '\n\x85\u2028\u2029': + hints += '-' + elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': + hints += '+' return hints def write_folded(self, text): hints = self.determine_block_hints(text) - self.write_indicator(u'>'+hints, True) - if hints[-1:] == u'+': + self.write_indicator('>'+hints, True) + if hints[-1:] == '+': self.open_ended = True self.write_line_break() leading_space = True @@ -1005,13 +1002,13 @@ class Emitter(object): if end < len(text): ch = text[end] if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if not leading_space and ch is not None and ch != u' ' \ - and text[start] == u'\n': + if ch is None or ch not in '\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != ' ' \ + and text[start] == '\n': self.write_line_break() - leading_space = (ch == u' ') + leading_space = (ch == ' ') for br in text[start:end]: - if br == u'\n': + if br == '\n': self.write_line_break() else: self.write_line_break(br) @@ -1019,7 +1016,7 @@ class Emitter(object): self.write_indent() start = end elif spaces: - if ch != u' ': + if ch != ' ': if start+1 == end and self.column > self.best_width: self.write_indent() else: @@ -1030,7 +1027,7 @@ class Emitter(object): self.stream.write(data) start = end else: - if ch is None or ch in u' \n\x85\u2028\u2029': + if ch is None or ch in ' \n\x85\u2028\u2029': data = text[start:end] self.column += len(data) if self.encoding: @@ -1040,14 +1037,14 @@ class Emitter(object): self.write_line_break() start = end if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') - spaces = (ch == u' ') + breaks = (ch in '\n\x85\u2028\u2029') + spaces = (ch == ' ') end += 1 def write_literal(self, text): hints = self.determine_block_hints(text) - self.write_indicator(u'|'+hints, True) - if hints[-1:] == u'+': + self.write_indicator('|'+hints, True) + if hints[-1:] == '+': self.open_ended = True self.write_line_break() breaks = True @@ -1057,9 +1054,9 @@ class Emitter(object): if end < len(text): ch = text[end] if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': + if ch is None or ch not in '\n\x85\u2028\u2029': for br in text[start:end]: - if br == u'\n': + if br == '\n': self.write_line_break() else: self.write_line_break(br) @@ -1067,7 +1064,7 @@ class Emitter(object): self.write_indent() start = end else: - if ch is None or ch in u'\n\x85\u2028\u2029': + if ch is None or ch in '\n\x85\u2028\u2029': data = text[start:end] if self.encoding: data = data.encode(self.encoding) @@ -1076,7 +1073,7 @@ class Emitter(object): self.write_line_break() start = end if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') + breaks = (ch in '\n\x85\u2028\u2029') end += 1 def write_plain(self, text, split=True): @@ -1085,7 +1082,7 @@ class Emitter(object): if not text: return if not self.whitespace: - data = u' ' + data = ' ' self.column += len(data) if self.encoding: data = data.encode(self.encoding) @@ -1100,7 +1097,7 @@ class Emitter(object): if end < len(text): ch = text[end] if spaces: - if ch != u' ': + if ch != ' ': if start+1 == end and self.column > self.best_width and split: self.write_indent() self.whitespace = False @@ -1113,11 +1110,11 @@ class Emitter(object): self.stream.write(data) start = end elif breaks: - if ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': + if ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': self.write_line_break() for br in text[start:end]: - if br == u'\n': + if br == '\n': self.write_line_break() else: self.write_line_break(br) @@ -1126,7 +1123,7 @@ class Emitter(object): self.indention = False start = end else: - if ch is None or ch in u' \n\x85\u2028\u2029': + if ch is None or ch in ' \n\x85\u2028\u2029': data = text[start:end] self.column += len(data) if self.encoding: @@ -1134,7 +1131,7 @@ class Emitter(object): self.stream.write(data) start = end if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') end += 1 diff --git a/libs/yaml/error.py b/libs/yaml/error.py index 577686db..b796b4dc 100644 --- a/libs/yaml/error.py +++ b/libs/yaml/error.py @@ -1,7 +1,7 @@ __all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] -class Mark(object): +class Mark: def __init__(self, name, index, line, column, buffer, pointer): self.name = name @@ -16,7 +16,7 @@ class Mark(object): return None head = '' start = self.pointer - while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029': + while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029': start -= 1 if self.pointer-start > max_length/2-1: head = ' ... ' @@ -24,13 +24,13 @@ class Mark(object): break tail = '' end = self.pointer - while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029': + while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': end += 1 if end-self.pointer > max_length/2-1: tail = ' ... ' end -= 5 break - snippet = self.buffer[start:end].encode('utf-8') + snippet = self.buffer[start:end] return ' '*indent + head + snippet + tail + '\n' \ + ' '*(indent+self.pointer-start+len(head)) + '^' diff --git a/libs/yaml/loader.py b/libs/yaml/loader.py index 293ff467..08c8f01b 100644 --- a/libs/yaml/loader.py +++ b/libs/yaml/loader.py @@ -1,12 +1,12 @@ __all__ = ['BaseLoader', 'SafeLoader', 'Loader'] -from reader import * -from scanner import * -from parser import * -from composer import * -from constructor import * -from resolver import * +from .reader import * +from .scanner import * +from .parser import * +from .composer import * +from .constructor import * +from .resolver import * class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): diff --git a/libs/yaml/parser.py b/libs/yaml/parser.py index f9e3057f..13a5995d 100644 --- a/libs/yaml/parser.py +++ b/libs/yaml/parser.py @@ -61,21 +61,21 @@ __all__ = ['Parser', 'ParserError'] -from error import MarkedYAMLError -from tokens import * -from events import * -from scanner import * +from .error import MarkedYAMLError +from .tokens import * +from .events import * +from .scanner import * class ParserError(MarkedYAMLError): pass -class Parser(object): +class Parser: # Since writing a recursive-descendant parser is a straightforward task, we # do not give many comments here. DEFAULT_TAGS = { - u'!': u'!', - u'!!': u'tag:yaml.org,2002:', + '!': '!', + '!!': 'tag:yaml.org,2002:', } def __init__(self): @@ -219,7 +219,7 @@ class Parser(object): self.tag_handles = {} while self.check_token(DirectiveToken): token = self.get_token() - if token.name == u'YAML': + if token.name == 'YAML': if self.yaml_version is not None: raise ParserError(None, None, "found duplicate YAML directive", token.start_mark) @@ -229,11 +229,11 @@ class Parser(object): "found incompatible YAML document (version 1.* is required)", token.start_mark) self.yaml_version = token.value - elif token.name == u'TAG': + elif token.name == 'TAG': handle, prefix = token.value if handle in self.tag_handles: raise ParserError(None, None, - "duplicate tag handle %r" % handle.encode('utf-8'), + "duplicate tag handle %r" % handle, token.start_mark) self.tag_handles[handle] = prefix if self.tag_handles: @@ -303,19 +303,19 @@ class Parser(object): if handle is not None: if handle not in self.tag_handles: raise ParserError("while parsing a node", start_mark, - "found undefined tag handle %r" % handle.encode('utf-8'), + "found undefined tag handle %r" % handle, tag_mark) tag = self.tag_handles[handle]+suffix else: tag = suffix - #if tag == u'!': + #if tag == '!': # raise ParserError("while parsing a node", start_mark, # "found non-specific tag '!'", tag_mark, # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") if start_mark is None: start_mark = end_mark = self.peek_token().start_mark event = None - implicit = (tag is None or tag == u'!') + implicit = (tag is None or tag == '!') if indentless_sequence and self.check_token(BlockEntryToken): end_mark = self.peek_token().end_mark event = SequenceStartEvent(anchor, tag, implicit, @@ -325,7 +325,7 @@ class Parser(object): if self.check_token(ScalarToken): token = self.get_token() end_mark = token.end_mark - if (token.plain and tag is None) or tag == u'!': + if (token.plain and tag is None) or tag == '!': implicit = (True, False) elif tag is None: implicit = (False, True) @@ -357,7 +357,7 @@ class Parser(object): elif anchor is not None or tag is not None: # Empty scalars are allowed even if a tag or an anchor is # specified. - event = ScalarEvent(anchor, tag, (implicit, False), u'', + event = ScalarEvent(anchor, tag, (implicit, False), '', start_mark, end_mark) self.state = self.states.pop() else: @@ -585,5 +585,5 @@ class Parser(object): return self.process_empty_scalar(self.peek_token().start_mark) def process_empty_scalar(self, mark): - return ScalarEvent(None, None, (True, False), u'', mark, mark) + return ScalarEvent(None, None, (True, False), '', mark, mark) diff --git a/libs/yaml/reader.py b/libs/yaml/reader.py index 3249e6b9..f70e920f 100644 --- a/libs/yaml/reader.py +++ b/libs/yaml/reader.py @@ -17,7 +17,7 @@ __all__ = ['Reader', 'ReaderError'] -from error import YAMLError, Mark +from .error import YAMLError, Mark import codecs, re @@ -31,7 +31,7 @@ class ReaderError(YAMLError): self.reason = reason def __str__(self): - if isinstance(self.character, str): + if isinstance(self.character, bytes): return "'%s' codec can't decode byte #x%02x: %s\n" \ " in \"%s\", position %d" \ % (self.encoding, ord(self.character), self.reason, @@ -44,13 +44,13 @@ class ReaderError(YAMLError): class Reader(object): # Reader: - # - determines the data encoding and converts it to unicode, + # - determines the data encoding and converts it to a unicode string, # - checks if characters are in allowed range, # - adds '\0' to the end. # Reader accepts + # - a `bytes` object, # - a `str` object, - # - a `unicode` object, # - a file-like object with its `read` method returning `str`, # - a file-like object with its `read` method returning `unicode`. @@ -61,7 +61,7 @@ class Reader(object): self.stream = None self.stream_pointer = 0 self.eof = True - self.buffer = u'' + self.buffer = '' self.pointer = 0 self.raw_buffer = None self.raw_decode = None @@ -69,19 +69,19 @@ class Reader(object): self.index = 0 self.line = 0 self.column = 0 - if isinstance(stream, unicode): + if isinstance(stream, str): self.name = "<unicode string>" self.check_printable(stream) - self.buffer = stream+u'\0' - elif isinstance(stream, str): - self.name = "<string>" + self.buffer = stream+'\0' + elif isinstance(stream, bytes): + self.name = "<byte string>" self.raw_buffer = stream self.determine_encoding() else: self.stream = stream self.name = getattr(stream, 'name', "<file>") self.eof = False - self.raw_buffer = '' + self.raw_buffer = None self.determine_encoding() def peek(self, index=0): @@ -103,11 +103,11 @@ class Reader(object): ch = self.buffer[self.pointer] self.pointer += 1 self.index += 1 - if ch in u'\n\x85\u2028\u2029' \ - or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): + if ch in '\n\x85\u2028\u2029' \ + or (ch == '\r' and self.buffer[self.pointer] != '\n'): self.line += 1 self.column = 0 - elif ch != u'\uFEFF': + elif ch != '\uFEFF': self.column += 1 length -= 1 @@ -120,9 +120,9 @@ class Reader(object): None, None) def determine_encoding(self): - while not self.eof and len(self.raw_buffer) < 2: + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() - if not isinstance(self.raw_buffer, unicode): + if isinstance(self.raw_buffer, bytes): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' @@ -134,7 +134,7 @@ class Reader(object): self.encoding = 'utf-8' self.update(1) - NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') def check_printable(self, data): match = self.NON_PRINTABLE.search(data) if match: @@ -155,8 +155,8 @@ class Reader(object): try: data, converted = self.raw_decode(self.raw_buffer, 'strict', self.eof) - except UnicodeDecodeError, exc: - character = exc.object[exc.start] + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] if self.stream is not None: position = self.stream_pointer-len(self.raw_buffer)+exc.start else: @@ -170,16 +170,18 @@ class Reader(object): self.buffer += data self.raw_buffer = self.raw_buffer[converted:] if self.eof: - self.buffer += u'\0' + self.buffer += '\0' self.raw_buffer = None break - def update_raw(self, size=1024): + def update_raw(self, size=4096): data = self.stream.read(size) - if data: - self.raw_buffer += data - self.stream_pointer += len(data) + if self.raw_buffer is None: + self.raw_buffer = data else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: self.eof = True #try: diff --git a/libs/yaml/representer.py b/libs/yaml/representer.py index 5f4fc70d..b9e65c51 100644 --- a/libs/yaml/representer.py +++ b/libs/yaml/representer.py @@ -2,17 +2,15 @@ __all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', 'RepresenterError'] -from error import * -from nodes import * +from .error import * +from .nodes import * -import datetime - -import sys, copy_reg, types +import datetime, sys, copyreg, types, base64, collections class RepresenterError(YAMLError): pass -class BaseRepresenter(object): +class BaseRepresenter: yaml_representers = {} yaml_multi_representers = {} @@ -31,12 +29,6 @@ class BaseRepresenter(object): self.object_keeper = [] self.alias_key = None - def get_classobj_bases(self, cls): - bases = [cls] - for base in cls.__bases__: - bases.extend(self.get_classobj_bases(base)) - return bases - def represent_data(self, data): if self.ignore_aliases(data): self.alias_key = None @@ -51,8 +43,6 @@ class BaseRepresenter(object): #self.represented_objects[alias_key] = None self.object_keeper.append(data) data_types = type(data).__mro__ - if type(data) is types.InstanceType: - data_types = self.get_classobj_bases(data.__class__)+list(data_types) if data_types[0] in self.yaml_representers: node = self.yaml_representers[data_types[0]](self, data) else: @@ -66,22 +56,22 @@ class BaseRepresenter(object): elif None in self.yaml_representers: node = self.yaml_representers[None](self, data) else: - node = ScalarNode(None, unicode(data)) + node = ScalarNode(None, str(data)) #if alias_key is not None: # self.represented_objects[alias_key] = node return node + @classmethod def add_representer(cls, data_type, representer): if not 'yaml_representers' in cls.__dict__: cls.yaml_representers = cls.yaml_representers.copy() cls.yaml_representers[data_type] = representer - add_representer = classmethod(add_representer) + @classmethod def add_multi_representer(cls, data_type, representer): if not 'yaml_multi_representers' in cls.__dict__: cls.yaml_multi_representers = cls.yaml_multi_representers.copy() cls.yaml_multi_representers[data_type] = representer - add_multi_representer = classmethod(add_multi_representer) def represent_scalar(self, tag, value, style=None): if style is None: @@ -116,8 +106,11 @@ class BaseRepresenter(object): self.represented_objects[self.alias_key] = node best_style = True if hasattr(mapping, 'items'): - mapping = mapping.items() - mapping.sort() + mapping = list(mapping.items()) + try: + mapping = sorted(mapping) + except TypeError: + pass for item_key, item_value in mapping: node_key = self.represent_data(item_key) node_value = self.represent_data(item_value) @@ -139,46 +132,35 @@ class BaseRepresenter(object): class SafeRepresenter(BaseRepresenter): def ignore_aliases(self, data): - if data in [None, ()]: + if data is None: return True - if isinstance(data, (str, unicode, bool, int, float)): + if isinstance(data, tuple) and data == (): + return True + if isinstance(data, (str, bytes, bool, int, float)): return True def represent_none(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:null', - u'null') + return self.represent_scalar('tag:yaml.org,2002:null', 'null') def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) + return self.represent_scalar('tag:yaml.org,2002:str', data) - def represent_unicode(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:str', data) + def represent_binary(self, data): + if hasattr(base64, 'encodebytes'): + data = base64.encodebytes(data).decode('ascii') + else: + data = base64.encodestring(data).decode('ascii') + return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') def represent_bool(self, data): if data: - value = u'true' + value = 'true' else: - value = u'false' - return self.represent_scalar(u'tag:yaml.org,2002:bool', value) + value = 'false' + return self.represent_scalar('tag:yaml.org,2002:bool', value) def represent_int(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) - - def represent_long(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + return self.represent_scalar('tag:yaml.org,2002:int', str(data)) inf_value = 1e300 while repr(inf_value) != repr(inf_value*inf_value): @@ -186,13 +168,13 @@ class SafeRepresenter(BaseRepresenter): def represent_float(self, data): if data != data or (data == 0.0 and data == 1.0): - value = u'.nan' + value = '.nan' elif data == self.inf_value: - value = u'.inf' + value = '.inf' elif data == -self.inf_value: - value = u'-.inf' + value = '-.inf' else: - value = unicode(repr(data)).lower() + value = repr(data).lower() # Note that in some cases `repr(data)` represents a float number # without the decimal parts. For instance: # >>> repr(1e17) @@ -200,9 +182,9 @@ class SafeRepresenter(BaseRepresenter): # Unfortunately, this is not a valid float representation according # to the definition of the `!!float` tag. We fix this by adding # '.0' before the 'e' symbol. - if u'.' not in value and u'e' in value: - value = value.replace(u'e', u'.0e', 1) - return self.represent_scalar(u'tag:yaml.org,2002:float', value) + if '.' not in value and 'e' in value: + value = value.replace('e', '.0e', 1) + return self.represent_scalar('tag:yaml.org,2002:float', value) def represent_list(self, data): #pairs = (len(data) > 0 and isinstance(data, list)) @@ -212,7 +194,7 @@ class SafeRepresenter(BaseRepresenter): # pairs = False # break #if not pairs: - return self.represent_sequence(u'tag:yaml.org,2002:seq', data) + return self.represent_sequence('tag:yaml.org,2002:seq', data) #value = [] #for item_key, item_value in data: # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', @@ -220,21 +202,21 @@ class SafeRepresenter(BaseRepresenter): #return SequenceNode(u'tag:yaml.org,2002:pairs', value) def represent_dict(self, data): - return self.represent_mapping(u'tag:yaml.org,2002:map', data) + return self.represent_mapping('tag:yaml.org,2002:map', data) def represent_set(self, data): value = {} for key in data: value[key] = None - return self.represent_mapping(u'tag:yaml.org,2002:set', value) + return self.represent_mapping('tag:yaml.org,2002:set', value) def represent_date(self, data): - value = unicode(data.isoformat()) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + value = data.isoformat() + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) def represent_datetime(self, data): - value = unicode(data.isoformat(' ')) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + value = data.isoformat(' ') + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) def represent_yaml_object(self, tag, data, cls, flow_style=None): if hasattr(data, '__getstate__'): @@ -252,8 +234,8 @@ SafeRepresenter.add_representer(type(None), SafeRepresenter.add_representer(str, SafeRepresenter.represent_str) -SafeRepresenter.add_representer(unicode, - SafeRepresenter.represent_unicode) +SafeRepresenter.add_representer(bytes, + SafeRepresenter.represent_binary) SafeRepresenter.add_representer(bool, SafeRepresenter.represent_bool) @@ -261,9 +243,6 @@ SafeRepresenter.add_representer(bool, SafeRepresenter.add_representer(int, SafeRepresenter.represent_int) -SafeRepresenter.add_representer(long, - SafeRepresenter.represent_long) - SafeRepresenter.add_representer(float, SafeRepresenter.represent_float) @@ -290,99 +269,27 @@ SafeRepresenter.add_representer(None, class Representer(SafeRepresenter): - def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:python/str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) - - def represent_unicode(self, data): - tag = None - try: - data.encode('ascii') - tag = u'tag:yaml.org,2002:python/unicode' - except UnicodeEncodeError: - tag = u'tag:yaml.org,2002:str' - return self.represent_scalar(tag, data) - - def represent_long(self, data): - tag = u'tag:yaml.org,2002:int' - if int(data) is not data: - tag = u'tag:yaml.org,2002:python/long' - return self.represent_scalar(tag, unicode(data)) - def represent_complex(self, data): if data.imag == 0.0: - data = u'%r' % data.real + data = '%r' % data.real elif data.real == 0.0: - data = u'%rj' % data.imag + data = '%rj' % data.imag elif data.imag > 0: - data = u'%r+%rj' % (data.real, data.imag) + data = '%r+%rj' % (data.real, data.imag) else: - data = u'%r%rj' % (data.real, data.imag) - return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) + data = '%r%rj' % (data.real, data.imag) + return self.represent_scalar('tag:yaml.org,2002:python/complex', data) def represent_tuple(self, data): - return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) + return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) def represent_name(self, data): - name = u'%s.%s' % (data.__module__, data.__name__) - return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'') + name = '%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '') def represent_module(self, data): return self.represent_scalar( - u'tag:yaml.org,2002:python/module:'+data.__name__, u'') - - def represent_instance(self, data): - # For instances of classic classes, we use __getinitargs__ and - # __getstate__ to serialize the data. - - # If data.__getinitargs__ exists, the object must be reconstructed by - # calling cls(**args), where args is a tuple returned by - # __getinitargs__. Otherwise, the cls.__init__ method should never be - # called and the class instance is created by instantiating a trivial - # class and assigning to the instance's __class__ variable. - - # If data.__getstate__ exists, it returns the state of the object. - # Otherwise, the state of the object is data.__dict__. - - # We produce either a !!python/object or !!python/object/new node. - # If data.__getinitargs__ does not exist and state is a dictionary, we - # produce a !!python/object node . Otherwise we produce a - # !!python/object/new node. - - cls = data.__class__ - class_name = u'%s.%s' % (cls.__module__, cls.__name__) - args = None - state = None - if hasattr(data, '__getinitargs__'): - args = list(data.__getinitargs__()) - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__ - if args is None and isinstance(state, dict): - return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+class_name, state) - if isinstance(state, dict) and not state: - return self.represent_sequence( - u'tag:yaml.org,2002:python/object/new:'+class_name, args) - value = {} - if args: - value['args'] = args - value['state'] = state - return self.represent_mapping( - u'tag:yaml.org,2002:python/object/new:'+class_name, value) + 'tag:yaml.org,2002:python/module:'+data.__name__, '') def represent_object(self, data): # We use __reduce__ API to save the data. data.__reduce__ returns @@ -402,8 +309,8 @@ class Representer(SafeRepresenter): # !!python/object/apply node. cls = type(data) - if cls in copy_reg.dispatch_table: - reduce = copy_reg.dispatch_table[cls](data) + if cls in copyreg.dispatch_table: + reduce = copyreg.dispatch_table[cls](data) elif hasattr(data, '__reduce_ex__'): reduce = data.__reduce_ex__(2) elif hasattr(data, '__reduce__'): @@ -422,16 +329,16 @@ class Representer(SafeRepresenter): if function.__name__ == '__newobj__': function = args[0] args = args[1:] - tag = u'tag:yaml.org,2002:python/object/new:' + tag = 'tag:yaml.org,2002:python/object/new:' newobj = True else: - tag = u'tag:yaml.org,2002:python/object/apply:' + tag = 'tag:yaml.org,2002:python/object/apply:' newobj = False - function_name = u'%s.%s' % (function.__module__, function.__name__) + function_name = '%s.%s' % (function.__module__, function.__name__) if not args and not listitems and not dictitems \ and isinstance(state, dict) and newobj: return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+function_name, state) + 'tag:yaml.org,2002:python/object:'+function_name, state) if not listitems and not dictitems \ and isinstance(state, dict) and not state: return self.represent_sequence(tag+function_name, args) @@ -446,14 +353,13 @@ class Representer(SafeRepresenter): value['dictitems'] = dictitems return self.represent_mapping(tag+function_name, value) -Representer.add_representer(str, - Representer.represent_str) - -Representer.add_representer(unicode, - Representer.represent_unicode) - -Representer.add_representer(long, - Representer.represent_long) + def represent_ordered_dict(self, data): + # Provide uniform representation across different Python versions. + data_type = type(data) + tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \ + % (data_type.__module__, data_type.__name__) + items = [[key, value] for key, value in data.items()] + return self.represent_sequence(tag, [items]) Representer.add_representer(complex, Representer.represent_complex) @@ -464,8 +370,8 @@ Representer.add_representer(tuple, Representer.add_representer(type, Representer.represent_name) -Representer.add_representer(types.ClassType, - Representer.represent_name) +Representer.add_representer(collections.OrderedDict, + Representer.represent_ordered_dict) Representer.add_representer(types.FunctionType, Representer.represent_name) @@ -476,9 +382,6 @@ Representer.add_representer(types.BuiltinFunctionType, Representer.add_representer(types.ModuleType, Representer.represent_module) -Representer.add_multi_representer(types.InstanceType, - Representer.represent_instance) - Representer.add_multi_representer(object, Representer.represent_object) diff --git a/libs/yaml/resolver.py b/libs/yaml/resolver.py index 6b5ab875..02b82e73 100644 --- a/libs/yaml/resolver.py +++ b/libs/yaml/resolver.py @@ -1,19 +1,19 @@ __all__ = ['BaseResolver', 'Resolver'] -from error import * -from nodes import * +from .error import * +from .nodes import * import re class ResolverError(YAMLError): pass -class BaseResolver(object): +class BaseResolver: - DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' + DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' yaml_implicit_resolvers = {} yaml_path_resolvers = {} @@ -22,15 +22,19 @@ class BaseResolver(object): self.resolver_exact_paths = [] self.resolver_prefix_paths = [] + @classmethod def add_implicit_resolver(cls, tag, regexp, first): if not 'yaml_implicit_resolvers' in cls.__dict__: - cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + implicit_resolvers = {} + for key in cls.yaml_implicit_resolvers: + implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:] + cls.yaml_implicit_resolvers = implicit_resolvers if first is None: first = [None] for ch in first: cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) - add_implicit_resolver = classmethod(add_implicit_resolver) + @classmethod def add_path_resolver(cls, tag, path, kind=None): # Note: `add_path_resolver` is experimental. The API could be changed. # `new_path` is a pattern that is matched against the path from the @@ -66,10 +70,10 @@ class BaseResolver(object): elif node_check is dict: node_check = MappingNode elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ - and not isinstance(node_check, basestring) \ + and not isinstance(node_check, str) \ and node_check is not None: raise ResolverError("Invalid node checker: %s" % node_check) - if not isinstance(index_check, (basestring, int)) \ + if not isinstance(index_check, (str, int)) \ and index_check is not None: raise ResolverError("Invalid index checker: %s" % index_check) new_path.append((node_check, index_check)) @@ -83,7 +87,6 @@ class BaseResolver(object): and kind is not None: raise ResolverError("Invalid node kind: %s" % kind) cls.yaml_path_resolvers[tuple(new_path), kind] = tag - add_path_resolver = classmethod(add_path_resolver) def descend_resolver(self, current_node, current_index): if not self.yaml_path_resolvers: @@ -117,7 +120,7 @@ class BaseResolver(object): def check_resolver_prefix(self, depth, path, kind, current_node, current_index): node_check, index_check = path[depth-1] - if isinstance(node_check, basestring): + if isinstance(node_check, str): if current_node.tag != node_check: return elif node_check is not None: @@ -128,7 +131,7 @@ class BaseResolver(object): if (index_check is False or index_check is None) \ and current_index is None: return - if isinstance(index_check, basestring): + if isinstance(index_check, str): if not (isinstance(current_index, ScalarNode) and index_check == current_index.value): return @@ -139,8 +142,8 @@ class BaseResolver(object): def resolve(self, kind, value, implicit): if kind is ScalarNode and implicit[0]: - if value == u'': - resolvers = self.yaml_implicit_resolvers.get(u'', []) + if value == '': + resolvers = self.yaml_implicit_resolvers.get('', []) else: resolvers = self.yaml_implicit_resolvers.get(value[0], []) resolvers += self.yaml_implicit_resolvers.get(None, []) @@ -165,60 +168,60 @@ class Resolver(BaseResolver): pass Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:bool', - re.compile(ur'''^(?:yes|Yes|YES|no|No|NO + 'tag:yaml.org,2002:bool', + re.compile(r'''^(?:yes|Yes|YES|no|No|NO |true|True|TRUE|false|False|FALSE |on|On|ON|off|Off|OFF)$''', re.X), - list(u'yYnNtTfFoO')) + list('yYnNtTfFoO')) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:float', - re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + 'tag:yaml.org,2002:float', + re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? |\.[0-9_]+(?:[eE][-+][0-9]+)? |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* |[-+]?\.(?:inf|Inf|INF) |\.(?:nan|NaN|NAN))$''', re.X), - list(u'-+0123456789.')) + list('-+0123456789.')) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:int', - re.compile(ur'''^(?:[-+]?0b[0-1_]+ + 'tag:yaml.org,2002:int', + re.compile(r'''^(?:[-+]?0b[0-1_]+ |[-+]?0[0-7_]+ |[-+]?(?:0|[1-9][0-9_]*) |[-+]?0x[0-9a-fA-F_]+ |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), - list(u'-+0123456789')) + list('-+0123456789')) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:merge', - re.compile(ur'^(?:<<)$'), - [u'<']) + 'tag:yaml.org,2002:merge', + re.compile(r'^(?:<<)$'), + ['<']) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:null', - re.compile(ur'''^(?: ~ + 'tag:yaml.org,2002:null', + re.compile(r'''^(?: ~ |null|Null|NULL | )$''', re.X), - [u'~', u'n', u'N', u'']) + ['~', 'n', 'N', '']) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:timestamp', - re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + 'tag:yaml.org,2002:timestamp', + re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? (?:[Tt]|[ \t]+)[0-9][0-9]? :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), - list(u'0123456789')) + list('0123456789')) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:value', - re.compile(ur'^(?:=)$'), - [u'=']) + 'tag:yaml.org,2002:value', + re.compile(r'^(?:=)$'), + ['=']) # The following resolver is only for documentation purposes. It cannot work # because plain scalars cannot start with '!', '&', or '*'. Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:yaml', - re.compile(ur'^(?:!|&|\*)$'), - list(u'!&*')) + 'tag:yaml.org,2002:yaml', + re.compile(r'^(?:!|&|\*)$'), + list('!&*')) diff --git a/libs/yaml/scanner.py b/libs/yaml/scanner.py index 5228fad6..c8d127b8 100644 --- a/libs/yaml/scanner.py +++ b/libs/yaml/scanner.py @@ -26,13 +26,13 @@ __all__ = ['Scanner', 'ScannerError'] -from error import MarkedYAMLError -from tokens import * +from .error import MarkedYAMLError +from .tokens import * class ScannerError(MarkedYAMLError): pass -class SimpleKey(object): +class SimpleKey: # See below simple keys treatment. def __init__(self, token_number, required, index, line, column, mark): @@ -43,7 +43,7 @@ class SimpleKey(object): self.column = column self.mark = mark -class Scanner(object): +class Scanner: def __init__(self): """Initialize the scanner.""" @@ -166,85 +166,85 @@ class Scanner(object): ch = self.peek() # Is it the end of stream? - if ch == u'\0': + if ch == '\0': return self.fetch_stream_end() # Is it a directive? - if ch == u'%' and self.check_directive(): + if ch == '%' and self.check_directive(): return self.fetch_directive() # Is it the document start? - if ch == u'-' and self.check_document_start(): + if ch == '-' and self.check_document_start(): return self.fetch_document_start() # Is it the document end? - if ch == u'.' and self.check_document_end(): + if ch == '.' and self.check_document_end(): return self.fetch_document_end() # TODO: support for BOM within a stream. - #if ch == u'\uFEFF': + #if ch == '\uFEFF': # return self.fetch_bom() <-- issue BOMToken # Note: the order of the following checks is NOT significant. # Is it the flow sequence start indicator? - if ch == u'[': + if ch == '[': return self.fetch_flow_sequence_start() # Is it the flow mapping start indicator? - if ch == u'{': + if ch == '{': return self.fetch_flow_mapping_start() # Is it the flow sequence end indicator? - if ch == u']': + if ch == ']': return self.fetch_flow_sequence_end() # Is it the flow mapping end indicator? - if ch == u'}': + if ch == '}': return self.fetch_flow_mapping_end() # Is it the flow entry indicator? - if ch == u',': + if ch == ',': return self.fetch_flow_entry() # Is it the block entry indicator? - if ch == u'-' and self.check_block_entry(): + if ch == '-' and self.check_block_entry(): return self.fetch_block_entry() # Is it the key indicator? - if ch == u'?' and self.check_key(): + if ch == '?' and self.check_key(): return self.fetch_key() # Is it the value indicator? - if ch == u':' and self.check_value(): + if ch == ':' and self.check_value(): return self.fetch_value() # Is it an alias? - if ch == u'*': + if ch == '*': return self.fetch_alias() # Is it an anchor? - if ch == u'&': + if ch == '&': return self.fetch_anchor() # Is it a tag? - if ch == u'!': + if ch == '!': return self.fetch_tag() # Is it a literal scalar? - if ch == u'|' and not self.flow_level: + if ch == '|' and not self.flow_level: return self.fetch_literal() # Is it a folded scalar? - if ch == u'>' and not self.flow_level: + if ch == '>' and not self.flow_level: return self.fetch_folded() # Is it a single quoted scalar? - if ch == u'\'': + if ch == '\'': return self.fetch_single() # Is it a double quoted scalar? - if ch == u'\"': + if ch == '\"': return self.fetch_double() # It must be a plain scalar then. @@ -253,8 +253,8 @@ class Scanner(object): # No? It's an error. Let's produce a nice error message. raise ScannerError("while scanning for the next token", None, - "found character %r that cannot start any token" - % ch.encode('utf-8'), self.get_mark()) + "found character %r that cannot start any token" % ch, + self.get_mark()) # Simple keys treatment. @@ -280,13 +280,13 @@ class Scanner(object): # - should be no longer than 1024 characters. # Disabling this procedure will allow simple keys of any length and # height (may cause problems if indentation is broken though). - for level in self.possible_simple_keys.keys(): + for level in list(self.possible_simple_keys): key = self.possible_simple_keys[level] if key.line != self.line \ or self.index-key.index > 1024: if key.required: raise ScannerError("while scanning a simple key", key.mark, - "could not found expected ':'", self.get_mark()) + "could not find expected ':'", self.get_mark()) del self.possible_simple_keys[level] def save_possible_simple_key(self): @@ -297,10 +297,6 @@ class Scanner(object): # Check if a simple key is required at the current position. required = not self.flow_level and self.indent == self.column - # A simple key is required only if it is the first token in the current - # line. Therefore it is always allowed. - assert self.allow_simple_key or not required - # The next token might be a simple key. Let's save it's number and # position. if self.allow_simple_key: @@ -317,7 +313,7 @@ class Scanner(object): if key.required: raise ScannerError("while scanning a simple key", key.mark, - "could not found expected ':'", self.get_mark()) + "could not find expected ':'", self.get_mark()) del self.possible_simple_keys[self.flow_level] @@ -692,22 +688,22 @@ class Scanner(object): # DOCUMENT-START: ^ '---' (' '|'\n') if self.column == 0: - if self.prefix(3) == u'---' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if self.prefix(3) == '---' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': return True def check_document_end(self): # DOCUMENT-END: ^ '...' (' '|'\n') if self.column == 0: - if self.prefix(3) == u'...' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if self.prefix(3) == '...' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': return True def check_block_entry(self): # BLOCK-ENTRY: '-' (' '|'\n') - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' def check_key(self): @@ -717,7 +713,7 @@ class Scanner(object): # KEY(block context): '?' (' '|'\n') else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' def check_value(self): @@ -727,7 +723,7 @@ class Scanner(object): # VALUE(block context): ':' (' '|'\n') else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' def check_plain(self): @@ -744,9 +740,9 @@ class Scanner(object): # '-' character) because we want the flow context to be space # independent. ch = self.peek() - return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ - or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' - and (ch == u'-' or (not self.flow_level and ch in u'?:'))) + return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' + and (ch == '-' or (not self.flow_level and ch in '?:'))) # Scanners. @@ -770,14 +766,14 @@ class Scanner(object): # `unwind_indent` before issuing BLOCK-END. # Scanners for block, flow, and plain scalars need to be modified. - if self.index == 0 and self.peek() == u'\uFEFF': + if self.index == 0 and self.peek() == '\uFEFF': self.forward() found = False while not found: - while self.peek() == u' ': + while self.peek() == ' ': self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': self.forward() if self.scan_line_break(): if not self.flow_level: @@ -791,15 +787,15 @@ class Scanner(object): self.forward() name = self.scan_directive_name(start_mark) value = None - if name == u'YAML': + if name == 'YAML': value = self.scan_yaml_directive_value(start_mark) end_mark = self.get_mark() - elif name == u'TAG': + elif name == 'TAG': value = self.scan_tag_directive_value(start_mark) end_mark = self.get_mark() else: end_mark = self.get_mark() - while self.peek() not in u'\0\r\n\x85\u2028\u2029': + while self.peek() not in '\0\r\n\x85\u2028\u2029': self.forward() self.scan_directive_ignored_line(start_mark) return DirectiveToken(name, value, start_mark, end_mark) @@ -808,51 +804,48 @@ class Scanner(object): # See the specification for details. length = 0 ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': length += 1 ch = self.peek(length) if not length: raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) value = self.prefix(length) self.forward(length) ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) return value def scan_yaml_directive_value(self, start_mark): # See the specification for details. - while self.peek() == u' ': + while self.peek() == ' ': self.forward() major = self.scan_yaml_directive_number(start_mark) if self.peek() != '.': raise ScannerError("while scanning a directive", start_mark, - "expected a digit or '.', but found %r" - % self.peek().encode('utf-8'), + "expected a digit or '.', but found %r" % self.peek(), self.get_mark()) self.forward() minor = self.scan_yaml_directive_number(start_mark) - if self.peek() not in u'\0 \r\n\x85\u2028\u2029': + if self.peek() not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, - "expected a digit or ' ', but found %r" - % self.peek().encode('utf-8'), + "expected a digit or ' ', but found %r" % self.peek(), self.get_mark()) return (major, minor) def scan_yaml_directive_number(self, start_mark): # See the specification for details. ch = self.peek() - if not (u'0' <= ch <= u'9'): + if not ('0' <= ch <= '9'): raise ScannerError("while scanning a directive", start_mark, - "expected a digit, but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected a digit, but found %r" % ch, self.get_mark()) length = 0 - while u'0' <= self.peek(length) <= u'9': + while '0' <= self.peek(length) <= '9': length += 1 value = int(self.prefix(length)) self.forward(length) @@ -860,10 +853,10 @@ class Scanner(object): def scan_tag_directive_value(self, start_mark): # See the specification for details. - while self.peek() == u' ': + while self.peek() == ' ': self.forward() handle = self.scan_tag_directive_handle(start_mark) - while self.peek() == u' ': + while self.peek() == ' ': self.forward() prefix = self.scan_tag_directive_prefix(start_mark) return (handle, prefix) @@ -872,34 +865,32 @@ class Scanner(object): # See the specification for details. value = self.scan_tag_handle('directive', start_mark) ch = self.peek() - if ch != u' ': + if ch != ' ': raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected ' ', but found %r" % ch, self.get_mark()) return value def scan_tag_directive_prefix(self, start_mark): # See the specification for details. value = self.scan_tag_uri('directive', start_mark) ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected ' ', but found %r" % ch, self.get_mark()) return value def scan_directive_ignored_line(self, start_mark): # See the specification for details. - while self.peek() == u' ': + while self.peek() == ' ': self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': self.forward() ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': + if ch not in '\0\r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) self.scan_line_break() def scan_anchor(self, TokenClass): @@ -913,28 +904,28 @@ class Scanner(object): # Therefore we restrict aliases to numbers and ASCII letters. start_mark = self.get_mark() indicator = self.peek() - if indicator == u'*': + if indicator == '*': name = 'alias' else: name = 'anchor' self.forward() length = 0 ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': length += 1 ch = self.peek(length) if not length: raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) value = self.prefix(length) self.forward(length) ch = self.peek() - if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) end_mark = self.get_mark() return TokenClass(value, start_mark, end_mark) @@ -942,40 +933,39 @@ class Scanner(object): # See the specification for details. start_mark = self.get_mark() ch = self.peek(1) - if ch == u'<': + if ch == '<': handle = None self.forward(2) suffix = self.scan_tag_uri('tag', start_mark) - if self.peek() != u'>': + if self.peek() != '>': raise ScannerError("while parsing a tag", start_mark, - "expected '>', but found %r" % self.peek().encode('utf-8'), + "expected '>', but found %r" % self.peek(), self.get_mark()) self.forward() - elif ch in u'\0 \t\r\n\x85\u2028\u2029': + elif ch in '\0 \t\r\n\x85\u2028\u2029': handle = None - suffix = u'!' + suffix = '!' self.forward() else: length = 1 use_handle = False - while ch not in u'\0 \r\n\x85\u2028\u2029': - if ch == u'!': + while ch not in '\0 \r\n\x85\u2028\u2029': + if ch == '!': use_handle = True break length += 1 ch = self.peek(length) - handle = u'!' + handle = '!' if use_handle: handle = self.scan_tag_handle('tag', start_mark) else: - handle = u'!' + handle = '!' self.forward() suffix = self.scan_tag_uri('tag', start_mark) ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a tag", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected ' ', but found %r" % ch, self.get_mark()) value = (handle, suffix) end_mark = self.get_mark() return TagToken(value, start_mark, end_mark) @@ -1006,39 +996,39 @@ class Scanner(object): else: indent = min_indent+increment-1 breaks, end_mark = self.scan_block_scalar_breaks(indent) - line_break = u'' + line_break = '' # Scan the inner part of the block scalar. - while self.column == indent and self.peek() != u'\0': + while self.column == indent and self.peek() != '\0': chunks.extend(breaks) - leading_non_space = self.peek() not in u' \t' + leading_non_space = self.peek() not in ' \t' length = 0 - while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': + while self.peek(length) not in '\0\r\n\x85\u2028\u2029': length += 1 chunks.append(self.prefix(length)) self.forward(length) line_break = self.scan_line_break() breaks, end_mark = self.scan_block_scalar_breaks(indent) - if self.column == indent and self.peek() != u'\0': + if self.column == indent and self.peek() != '\0': # Unfortunately, folding rules are ambiguous. # # This is the folding according to the specification: - if folded and line_break == u'\n' \ - and leading_non_space and self.peek() not in u' \t': + if folded and line_break == '\n' \ + and leading_non_space and self.peek() not in ' \t': if not breaks: - chunks.append(u' ') + chunks.append(' ') else: chunks.append(line_break) # This is Clark Evans's interpretation (also in the spec # examples): # - #if folded and line_break == u'\n': + #if folded and line_break == '\n': # if not breaks: # if self.peek() not in ' \t': - # chunks.append(u' ') + # chunks.append(' ') # else: # chunks.append(line_break) #else: @@ -1053,7 +1043,7 @@ class Scanner(object): chunks.extend(breaks) # We are done. - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + return ScalarToken(''.join(chunks), False, start_mark, end_mark, style) def scan_block_scalar_indicators(self, start_mark): @@ -1061,21 +1051,21 @@ class Scanner(object): chomping = None increment = None ch = self.peek() - if ch in u'+-': + if ch in '+-': if ch == '+': chomping = True else: chomping = False self.forward() ch = self.peek() - if ch in u'0123456789': + if ch in '0123456789': increment = int(ch) if increment == 0: raise ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0", self.get_mark()) self.forward() - elif ch in u'0123456789': + elif ch in '0123456789': increment = int(ch) if increment == 0: raise ScannerError("while scanning a block scalar", start_mark, @@ -1083,31 +1073,31 @@ class Scanner(object): self.get_mark()) self.forward() ch = self.peek() - if ch in u'+-': + if ch in '+-': if ch == '+': chomping = True else: chomping = False self.forward() ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a block scalar", start_mark, "expected chomping or indentation indicators, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) return chomping, increment def scan_block_scalar_ignored_line(self, start_mark): # See the specification for details. - while self.peek() == u' ': + while self.peek() == ' ': self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': self.forward() ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': + if ch not in '\0\r\n\x85\u2028\u2029': raise ScannerError("while scanning a block scalar", start_mark, - "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.get_mark()) + "expected a comment or a line break, but found %r" % ch, + self.get_mark()) self.scan_line_break() def scan_block_scalar_indentation(self): @@ -1115,8 +1105,8 @@ class Scanner(object): chunks = [] max_indent = 0 end_mark = self.get_mark() - while self.peek() in u' \r\n\x85\u2028\u2029': - if self.peek() != u' ': + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() != ' ': chunks.append(self.scan_line_break()) end_mark = self.get_mark() else: @@ -1129,12 +1119,12 @@ class Scanner(object): # See the specification for details. chunks = [] end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': + while self.column < indent and self.peek() == ' ': self.forward() - while self.peek() in u'\r\n\x85\u2028\u2029': + while self.peek() in '\r\n\x85\u2028\u2029': chunks.append(self.scan_line_break()) end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': + while self.column < indent and self.peek() == ' ': self.forward() return chunks, end_mark @@ -1159,33 +1149,33 @@ class Scanner(object): chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) self.forward() end_mark = self.get_mark() - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + return ScalarToken(''.join(chunks), False, start_mark, end_mark, style) ESCAPE_REPLACEMENTS = { - u'0': u'\0', - u'a': u'\x07', - u'b': u'\x08', - u't': u'\x09', - u'\t': u'\x09', - u'n': u'\x0A', - u'v': u'\x0B', - u'f': u'\x0C', - u'r': u'\x0D', - u'e': u'\x1B', - u' ': u'\x20', - u'\"': u'\"', - u'\\': u'\\', - u'N': u'\x85', - u'_': u'\xA0', - u'L': u'\u2028', - u'P': u'\u2029', + '0': '\0', + 'a': '\x07', + 'b': '\x08', + 't': '\x09', + '\t': '\x09', + 'n': '\x0A', + 'v': '\x0B', + 'f': '\x0C', + 'r': '\x0D', + 'e': '\x1B', + ' ': '\x20', + '\"': '\"', + '\\': '\\', + 'N': '\x85', + '_': '\xA0', + 'L': '\u2028', + 'P': '\u2029', } ESCAPE_CODES = { - u'x': 2, - u'u': 4, - u'U': 8, + 'x': 2, + 'u': 4, + 'U': 8, } def scan_flow_scalar_non_spaces(self, double, start_mark): @@ -1193,19 +1183,19 @@ class Scanner(object): chunks = [] while True: length = 0 - while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': + while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': length += 1 if length: chunks.append(self.prefix(length)) self.forward(length) ch = self.peek() - if not double and ch == u'\'' and self.peek(1) == u'\'': - chunks.append(u'\'') + if not double and ch == '\'' and self.peek(1) == '\'': + chunks.append('\'') self.forward(2) - elif (double and ch == u'\'') or (not double and ch in u'\"\\'): + elif (double and ch == '\'') or (not double and ch in '\"\\'): chunks.append(ch) self.forward() - elif double and ch == u'\\': + elif double and ch == '\\': self.forward() ch = self.peek() if ch in self.ESCAPE_REPLACEMENTS: @@ -1215,19 +1205,19 @@ class Scanner(object): length = self.ESCAPE_CODES[ch] self.forward() for k in range(length): - if self.peek(k) not in u'0123456789ABCDEFabcdef': + if self.peek(k) not in '0123456789ABCDEFabcdef': raise ScannerError("while scanning a double-quoted scalar", start_mark, "expected escape sequence of %d hexdecimal numbers, but found %r" % - (length, self.peek(k).encode('utf-8')), self.get_mark()) + (length, self.peek(k)), self.get_mark()) code = int(self.prefix(length), 16) - chunks.append(unichr(code)) + chunks.append(chr(code)) self.forward(length) - elif ch in u'\r\n\x85\u2028\u2029': + elif ch in '\r\n\x85\u2028\u2029': self.scan_line_break() chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) else: raise ScannerError("while scanning a double-quoted scalar", start_mark, - "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) + "found unknown escape character %r" % ch, self.get_mark()) else: return chunks @@ -1235,21 +1225,21 @@ class Scanner(object): # See the specification for details. chunks = [] length = 0 - while self.peek(length) in u' \t': + while self.peek(length) in ' \t': length += 1 whitespaces = self.prefix(length) self.forward(length) ch = self.peek() - if ch == u'\0': + if ch == '\0': raise ScannerError("while scanning a quoted scalar", start_mark, "found unexpected end of stream", self.get_mark()) - elif ch in u'\r\n\x85\u2028\u2029': + elif ch in '\r\n\x85\u2028\u2029': line_break = self.scan_line_break() breaks = self.scan_flow_scalar_breaks(double, start_mark) - if line_break != u'\n': + if line_break != '\n': chunks.append(line_break) elif not breaks: - chunks.append(u' ') + chunks.append(' ') chunks.extend(breaks) else: chunks.append(whitespaces) @@ -1262,13 +1252,13 @@ class Scanner(object): # Instead of checking indentation, we check for document # separators. prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': raise ScannerError("while scanning a quoted scalar", start_mark, "found unexpected document separator", self.get_mark()) - while self.peek() in u' \t': + while self.peek() in ' \t': self.forward() - if self.peek() in u'\r\n\x85\u2028\u2029': + if self.peek() in '\r\n\x85\u2028\u2029': chunks.append(self.scan_line_break()) else: return chunks @@ -1290,19 +1280,19 @@ class Scanner(object): spaces = [] while True: length = 0 - if self.peek() == u'#': + if self.peek() == '#': break while True: ch = self.peek(length) - if ch in u'\0 \t\r\n\x85\u2028\u2029' \ - or (not self.flow_level and ch == u':' and - self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \ - or (self.flow_level and ch in u',:?[]{}'): + if ch in '\0 \t\r\n\x85\u2028\u2029' \ + or (not self.flow_level and ch == ':' and + self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029') \ + or (self.flow_level and ch in ',:?[]{}'): break length += 1 # It's not clear what we should do with ':' in the flow context. - if (self.flow_level and ch == u':' - and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): + if (self.flow_level and ch == ':' + and self.peek(length+1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}'): self.forward(length) raise ScannerError("while scanning a plain scalar", start_mark, "found unexpected ':'", self.get_mark(), @@ -1315,10 +1305,10 @@ class Scanner(object): self.forward(length) end_mark = self.get_mark() spaces = self.scan_plain_spaces(indent, start_mark) - if not spaces or self.peek() == u'#' \ + if not spaces or self.peek() == '#' \ or (not self.flow_level and self.column < indent): break - return ScalarToken(u''.join(chunks), True, start_mark, end_mark) + return ScalarToken(''.join(chunks), True, start_mark, end_mark) def scan_plain_spaces(self, indent, start_mark): # See the specification for details. @@ -1326,32 +1316,32 @@ class Scanner(object): # We just forbid them completely. Do not use tabs in YAML! chunks = [] length = 0 - while self.peek(length) in u' ': + while self.peek(length) in ' ': length += 1 whitespaces = self.prefix(length) self.forward(length) ch = self.peek() - if ch in u'\r\n\x85\u2028\u2029': + if ch in '\r\n\x85\u2028\u2029': line_break = self.scan_line_break() self.allow_simple_key = True prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': return breaks = [] - while self.peek() in u' \r\n\x85\u2028\u2029': + while self.peek() in ' \r\n\x85\u2028\u2029': if self.peek() == ' ': self.forward() else: breaks.append(self.scan_line_break()) prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': return - if line_break != u'\n': + if line_break != '\n': chunks.append(line_break) elif not breaks: - chunks.append(u' ') + chunks.append(' ') chunks.extend(breaks) elif whitespaces: chunks.append(whitespaces) @@ -1362,22 +1352,20 @@ class Scanner(object): # For some strange reasons, the specification does not allow '_' in # tag handles. I have allowed it anyway. ch = self.peek() - if ch != u'!': + if ch != '!': raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected '!', but found %r" % ch, self.get_mark()) length = 1 ch = self.peek(length) - if ch != u' ': - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': + if ch != ' ': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': length += 1 ch = self.peek(length) - if ch != u'!': + if ch != '!': self.forward(length) raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected '!', but found %r" % ch, self.get_mark()) length += 1 value = self.prefix(length) self.forward(length) @@ -1389,9 +1377,9 @@ class Scanner(object): chunks = [] length = 0 ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.!~*\'()[]%': - if ch == u'%': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.!~*\'()[]%': + if ch == '%': chunks.append(self.prefix(length)) self.forward(length) length = 0 @@ -1405,26 +1393,25 @@ class Scanner(object): length = 0 if not chunks: raise ScannerError("while parsing a %s" % name, start_mark, - "expected URI, but found %r" % ch.encode('utf-8'), - self.get_mark()) - return u''.join(chunks) + "expected URI, but found %r" % ch, self.get_mark()) + return ''.join(chunks) def scan_uri_escapes(self, name, start_mark): # See the specification for details. - bytes = [] + codes = [] mark = self.get_mark() - while self.peek() == u'%': + while self.peek() == '%': self.forward() for k in range(2): - if self.peek(k) not in u'0123456789ABCDEFabcdef': + if self.peek(k) not in '0123456789ABCDEFabcdef': raise ScannerError("while scanning a %s" % name, start_mark, - "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % - (self.peek(k).encode('utf-8')), self.get_mark()) - bytes.append(chr(int(self.prefix(2), 16))) + "expected URI escape sequence of 2 hexdecimal numbers, but found %r" + % self.peek(k), self.get_mark()) + codes.append(int(self.prefix(2), 16)) self.forward(2) try: - value = unicode(''.join(bytes), 'utf-8') - except UnicodeDecodeError, exc: + value = bytes(codes).decode('utf-8') + except UnicodeDecodeError as exc: raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) return value @@ -1438,16 +1425,16 @@ class Scanner(object): # '\u2029 : '\u2029' # default : '' ch = self.peek() - if ch in u'\r\n\x85': - if self.prefix(2) == u'\r\n': + if ch in '\r\n\x85': + if self.prefix(2) == '\r\n': self.forward(2) else: self.forward() - return u'\n' - elif ch in u'\u2028\u2029': + return '\n' + elif ch in '\u2028\u2029': self.forward() return ch - return u'' + return '' #try: # import psyco diff --git a/libs/yaml/serializer.py b/libs/yaml/serializer.py index 0bf1e96d..fe911e67 100644 --- a/libs/yaml/serializer.py +++ b/libs/yaml/serializer.py @@ -1,16 +1,16 @@ __all__ = ['Serializer', 'SerializerError'] -from error import YAMLError -from events import * -from nodes import * +from .error import YAMLError +from .events import * +from .nodes import * class SerializerError(YAMLError): pass -class Serializer(object): +class Serializer: - ANCHOR_TEMPLATE = u'id%03d' + ANCHOR_TEMPLATE = 'id%03d' def __init__(self, encoding=None, explicit_start=None, explicit_end=None, version=None, tags=None):