diff --git a/.gitignore b/.gitignore index 9c80798b..3efe07d4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,7 @@ *.cfg !.bumpversion.cfg *.cfg.old -*.pyc -*.pyo +*.py[cod] *.log *.pid *.db diff --git a/libs/common/_yaml.cp37-win32.pyd b/libs/common/_yaml.cp37-win32.pyd deleted file mode 100644 index fdfc7112..00000000 Binary files a/libs/common/_yaml.cp37-win32.pyd and /dev/null differ diff --git a/libs/common/_yaml/__init__.py b/libs/common/_yaml/__init__.py new file mode 100644 index 00000000..7baa8c4b --- /dev/null +++ b/libs/common/_yaml/__init__.py @@ -0,0 +1,33 @@ +# This is a stub package designed to roughly emulate the _yaml +# extension module, which previously existed as a standalone module +# and has been moved into the `yaml` package namespace. +# It does not perfectly mimic its old counterpart, but should get +# close enough for anyone who's relying on it even when they shouldn't. +import yaml + +# in some circumstances, the yaml module we imoprted may be from a different version, so we need +# to tread carefully when poking at it here (it may not have the attributes we expect) +if not getattr(yaml, '__with_libyaml__', False): + from sys import version_info + + exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError + raise exc("No module named '_yaml'") +else: + from yaml._yaml import * + import warnings + warnings.warn( + 'The _yaml extension module is now located at yaml._yaml' + ' and its location is subject to change. To use the' + ' LibYAML-based parser and emitter, import from `yaml`:' + ' `from yaml import CLoader as Loader, CDumper as Dumper`.', + DeprecationWarning + ) + del warnings + # Don't `del yaml` here because yaml is actually an existing + # namespace member of _yaml. + +__name__ = '_yaml' +# If the module is top-level (i.e. not a part of any specific package) +# then the attribute should be set to ''. +# https://docs.python.org/3.8/library/types.html +__package__ = '' diff --git a/libs/common/appdirs.py b/libs/common/appdirs.py index ae67001a..2acd1deb 100644 --- a/libs/common/appdirs.py +++ b/libs/common/appdirs.py @@ -13,8 +13,8 @@ See for details and usage. # - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html # - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html -__version_info__ = (1, 4, 3) -__version__ = '.'.join(map(str, __version_info__)) +__version__ = "1.4.4" +__version_info__ = tuple(int(segment) for segment in __version__.split(".")) import sys diff --git a/libs/common/beets/__init__.py b/libs/common/beets/__init__.py index b8fe2a84..9642a6f3 100644 --- a/libs/common/beets/__init__.py +++ b/libs/common/beets/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -13,30 +12,29 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function -import os +import confuse +from sys import stderr -from beets.util import confit - -__version__ = u'1.4.7' -__author__ = u'Adrian Sampson ' +__version__ = '1.6.0' +__author__ = 'Adrian Sampson ' -class IncludeLazyConfig(confit.LazyConfig): - """A version of Confit's LazyConfig that also merges in data from +class IncludeLazyConfig(confuse.LazyConfig): + """A version of Confuse's LazyConfig that also merges in data from YAML files specified in an `include` setting. """ def read(self, user=True, defaults=True): - super(IncludeLazyConfig, self).read(user, defaults) + super().read(user, defaults) try: for view in self['include']: - filename = view.as_filename() - if os.path.isfile(filename): - self.set_file(filename) - except confit.NotFoundError: + self.set_file(view.as_filename()) + except confuse.NotFoundError: pass + except confuse.ConfigReadError as err: + stderr.write("configuration `import` failed: {}" + .format(err.reason)) config = IncludeLazyConfig('beets', __name__) diff --git a/libs/common/beets/__main__.py b/libs/common/beets/__main__.py index 8010ca0d..ac829de9 100644 --- a/libs/common/beets/__main__.py +++ b/libs/common/beets/__main__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2017, Adrian Sampson. # @@ -17,7 +16,6 @@ `python -m beets`. """ -from __future__ import division, absolute_import, print_function import sys from .ui import main diff --git a/libs/common/beets/art.py b/libs/common/beets/art.py index 979a6f72..13d5dfbd 100644 --- a/libs/common/beets/art.py +++ b/libs/common/beets/art.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -17,7 +16,6 @@ music and items' embedded album art. """ -from __future__ import division, absolute_import, print_function import subprocess import platform @@ -26,7 +24,7 @@ import os from beets.util import displayable_path, syspath, bytestring_path from beets.util.artresizer import ArtResizer -from beets import mediafile +import mediafile def mediafile_image(image_path, maxwidth=None): @@ -43,7 +41,7 @@ def get_art(log, item): try: mf = mediafile.MediaFile(syspath(item.path)) except mediafile.UnreadableFileError as exc: - log.warning(u'Could not extract art from {0}: {1}', + log.warning('Could not extract art from {0}: {1}', displayable_path(item.path), exc) return @@ -51,26 +49,27 @@ def get_art(log, item): def embed_item(log, item, imagepath, maxwidth=None, itempath=None, - compare_threshold=0, ifempty=False, as_album=False): + compare_threshold=0, ifempty=False, as_album=False, id3v23=None, + quality=0): """Embed an image into the item's media file. """ # Conditions and filters. if compare_threshold: if not check_art_similarity(log, item, imagepath, compare_threshold): - log.info(u'Image not similar; skipping.') + log.info('Image not similar; skipping.') return if ifempty and get_art(log, item): - log.info(u'media file already contained art') - return + log.info('media file already contained art') + return if maxwidth and not as_album: - imagepath = resize_image(log, imagepath, maxwidth) + imagepath = resize_image(log, imagepath, maxwidth, quality) # Get the `Image` object from the file. try: - log.debug(u'embedding {0}', displayable_path(imagepath)) + log.debug('embedding {0}', displayable_path(imagepath)) image = mediafile_image(imagepath, maxwidth) - except IOError as exc: - log.warning(u'could not read image file: {0}', exc) + except OSError as exc: + log.warning('could not read image file: {0}', exc) return # Make sure the image kind is safe (some formats only support PNG @@ -80,36 +79,39 @@ def embed_item(log, item, imagepath, maxwidth=None, itempath=None, image.mime_type) return - item.try_write(path=itempath, tags={'images': [image]}) + item.try_write(path=itempath, tags={'images': [image]}, id3v23=id3v23) -def embed_album(log, album, maxwidth=None, quiet=False, - compare_threshold=0, ifempty=False): +def embed_album(log, album, maxwidth=None, quiet=False, compare_threshold=0, + ifempty=False, quality=0): """Embed album art into all of the album's items. """ imagepath = album.artpath if not imagepath: - log.info(u'No album art present for {0}', album) + log.info('No album art present for {0}', album) return if not os.path.isfile(syspath(imagepath)): - log.info(u'Album art not found at {0} for {1}', + log.info('Album art not found at {0} for {1}', displayable_path(imagepath), album) return if maxwidth: - imagepath = resize_image(log, imagepath, maxwidth) + imagepath = resize_image(log, imagepath, maxwidth, quality) - log.info(u'Embedding album art into {0}', album) + log.info('Embedding album art into {0}', album) for item in album.items(): - embed_item(log, item, imagepath, maxwidth, None, - compare_threshold, ifempty, as_album=True) + embed_item(log, item, imagepath, maxwidth, None, compare_threshold, + ifempty, as_album=True, quality=quality) -def resize_image(log, imagepath, maxwidth): - """Returns path to an image resized to maxwidth. +def resize_image(log, imagepath, maxwidth, quality): + """Returns path to an image resized to maxwidth and encoded with the + specified quality level. """ - log.debug(u'Resizing album art to {0} pixels wide', maxwidth) - imagepath = ArtResizer.shared.resize(maxwidth, syspath(imagepath)) + log.debug('Resizing album art to {0} pixels wide and encoding at quality \ + level {1}', maxwidth, quality) + imagepath = ArtResizer.shared.resize(maxwidth, syspath(imagepath), + quality=quality) return imagepath @@ -131,7 +133,7 @@ def check_art_similarity(log, item, imagepath, compare_threshold): syspath(art, prefix=False), '-colorspace', 'gray', 'MIFF:-'] compare_cmd = ['compare', '-metric', 'PHASH', '-', 'null:'] - log.debug(u'comparing images with pipeline {} | {}', + log.debug('comparing images with pipeline {} | {}', convert_cmd, compare_cmd) convert_proc = subprocess.Popen( convert_cmd, @@ -155,7 +157,7 @@ def check_art_similarity(log, item, imagepath, compare_threshold): convert_proc.wait() if convert_proc.returncode: log.debug( - u'ImageMagick convert failed with status {}: {!r}', + 'ImageMagick convert failed with status {}: {!r}', convert_proc.returncode, convert_stderr, ) @@ -165,7 +167,7 @@ def check_art_similarity(log, item, imagepath, compare_threshold): stdout, stderr = compare_proc.communicate() if compare_proc.returncode: if compare_proc.returncode != 1: - log.debug(u'ImageMagick compare failed: {0}, {1}', + log.debug('ImageMagick compare failed: {0}, {1}', displayable_path(imagepath), displayable_path(art)) return @@ -176,10 +178,10 @@ def check_art_similarity(log, item, imagepath, compare_threshold): try: phash_diff = float(out_str) except ValueError: - log.debug(u'IM output is not a number: {0!r}', out_str) + log.debug('IM output is not a number: {0!r}', out_str) return - log.debug(u'ImageMagick compare score: {0}', phash_diff) + log.debug('ImageMagick compare score: {0}', phash_diff) return phash_diff <= compare_threshold return True @@ -189,18 +191,18 @@ def extract(log, outpath, item): art = get_art(log, item) outpath = bytestring_path(outpath) if not art: - log.info(u'No album art present in {0}, skipping.', item) + log.info('No album art present in {0}, skipping.', item) return # Add an extension to the filename. ext = mediafile.image_extension(art) if not ext: - log.warning(u'Unknown image type in {0}.', + log.warning('Unknown image type in {0}.', displayable_path(item.path)) return outpath += bytestring_path('.' + ext) - log.info(u'Extracting album art from: {0} to: {1}', + log.info('Extracting album art from: {0} to: {1}', item, displayable_path(outpath)) with open(syspath(outpath), 'wb') as f: f.write(art) @@ -216,7 +218,7 @@ def extract_first(log, outpath, items): def clear(log, lib, query): items = lib.items(query) - log.info(u'Clearing album art from {0} items', len(items)) + log.info('Clearing album art from {0} items', len(items)) for item in items: - log.debug(u'Clearing art for {0}', item) + log.debug('Clearing art for {0}', item) item.try_write(tags={'images': None}) diff --git a/libs/common/beets/autotag/__init__.py b/libs/common/beets/autotag/__init__.py index c4ee1300..e62f492c 100644 --- a/libs/common/beets/autotag/__init__.py +++ b/libs/common/beets/autotag/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,19 +15,59 @@ """Facilities for automatically determining files' correct metadata. """ -from __future__ import division, absolute_import, print_function from beets import logging from beets import config # Parts of external interface. -from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch # noqa +from .hooks import ( # noqa + AlbumInfo, + TrackInfo, + AlbumMatch, + TrackMatch, + Distance, +) from .match import tag_item, tag_album, Proposal # noqa from .match import Recommendation # noqa # Global logger. log = logging.getLogger('beets') +# Metadata fields that are already hardcoded, or where the tag name changes. +SPECIAL_FIELDS = { + 'album': ( + 'va', + 'releasegroup_id', + 'artist_id', + 'album_id', + 'mediums', + 'tracks', + 'year', + 'month', + 'day', + 'artist', + 'artist_credit', + 'artist_sort', + 'data_url' + ), + 'track': ( + 'track_alt', + 'artist_id', + 'release_track_id', + 'medium', + 'index', + 'medium_index', + 'title', + 'artist_credit', + 'artist_sort', + 'artist', + 'track_id', + 'medium_total', + 'data_url', + 'length' + ) +} + # Additional utilities for the main interface. @@ -43,17 +82,14 @@ def apply_item_metadata(item, track_info): item.mb_releasetrackid = track_info.release_track_id if track_info.artist_id: item.mb_artistid = track_info.artist_id - if track_info.data_source: - item.data_source = track_info.data_source - if track_info.lyricist is not None: - item.lyricist = track_info.lyricist - if track_info.composer is not None: - item.composer = track_info.composer - if track_info.composer_sort is not None: - item.composer_sort = track_info.composer_sort - if track_info.arranger is not None: - item.arranger = track_info.arranger + for field, value in track_info.items(): + # We only overwrite fields that are not already hardcoded. + if field in SPECIAL_FIELDS['track']: + continue + if value is None: + continue + item[field] = value # At the moment, the other metadata is left intact (including album # and track number). Perhaps these should be emptied? @@ -142,33 +178,24 @@ def apply_metadata(album_info, mapping): # Compilation flag. item.comp = album_info.va - # Miscellaneous metadata. - for field in ('albumtype', - 'label', - 'asin', - 'catalognum', - 'script', - 'language', - 'country', - 'albumstatus', - 'albumdisambig', - 'data_source',): - value = getattr(album_info, field) - if value is not None: - item[field] = value - if track_info.disctitle is not None: - item.disctitle = track_info.disctitle - - if track_info.media is not None: - item.media = track_info.media - - if track_info.lyricist is not None: - item.lyricist = track_info.lyricist - if track_info.composer is not None: - item.composer = track_info.composer - if track_info.composer_sort is not None: - item.composer_sort = track_info.composer_sort - if track_info.arranger is not None: - item.arranger = track_info.arranger - + # Track alt. item.track_alt = track_info.track_alt + + # Don't overwrite fields with empty values unless the + # field is explicitly allowed to be overwritten + for field, value in album_info.items(): + if field in SPECIAL_FIELDS['album']: + continue + clobber = field in config['overwrite_null']['album'].as_str_seq() + if value is None and not clobber: + continue + item[field] = value + + for field, value in track_info.items(): + if field in SPECIAL_FIELDS['track']: + continue + clobber = field in config['overwrite_null']['track'].as_str_seq() + value = getattr(track_info, field) + if value is None and not clobber: + continue + item[field] = value diff --git a/libs/common/beets/autotag/hooks.py b/libs/common/beets/autotag/hooks.py index 3615a933..9cd6f2cd 100644 --- a/libs/common/beets/autotag/hooks.py +++ b/libs/common/beets/autotag/hooks.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -14,7 +13,6 @@ # included in all copies or substantial portions of the Software. """Glue between metadata sources and the matching logic.""" -from __future__ import division, absolute_import, print_function from collections import namedtuple from functools import total_ordering @@ -27,14 +25,36 @@ from beets.util import as_string from beets.autotag import mb from jellyfish import levenshtein_distance from unidecode import unidecode -import six log = logging.getLogger('beets') +# The name of the type for patterns in re changed in Python 3.7. +try: + Pattern = re._pattern_type +except AttributeError: + Pattern = re.Pattern + # Classes used to represent candidate options. +class AttrDict(dict): + """A dictionary that supports attribute ("dot") access, so `d.field` + is equivalent to `d['field']`. + """ -class AlbumInfo(object): + def __getattr__(self, attr): + if attr in self: + return self.get(attr) + else: + raise AttributeError + + def __setattr__(self, key, value): + self.__setitem__(key, value) + + def __hash__(self): + return id(self) + + +class AlbumInfo(AttrDict): """Describes a canonical release that may be used to match a release in the library. Consists of these data members: @@ -43,38 +63,22 @@ class AlbumInfo(object): - ``artist``: name of the release's primary artist - ``artist_id`` - ``tracks``: list of TrackInfo objects making up the release - - ``asin``: Amazon ASIN - - ``albumtype``: string describing the kind of release - - ``va``: boolean: whether the release has "various artists" - - ``year``: release year - - ``month``: release month - - ``day``: release day - - ``label``: music label responsible for the release - - ``mediums``: the number of discs in this release - - ``artist_sort``: name of the release's artist for sorting - - ``releasegroup_id``: MBID for the album's release group - - ``catalognum``: the label's catalog number for the release - - ``script``: character set used for metadata - - ``language``: human language of the metadata - - ``country``: the release country - - ``albumstatus``: MusicBrainz release status (Official, etc.) - - ``media``: delivery mechanism (Vinyl, etc.) - - ``albumdisambig``: MusicBrainz release disambiguation comment - - ``artist_credit``: Release-specific artist name - - ``data_source``: The original data source (MusicBrainz, Discogs, etc.) - - ``data_url``: The data source release URL. - The fields up through ``tracks`` are required. The others are - optional and may be None. + ``mediums`` along with the fields up through ``tracks`` are required. + The others are optional and may be None. """ - def __init__(self, album, album_id, artist, artist_id, tracks, asin=None, - albumtype=None, va=False, year=None, month=None, day=None, - label=None, mediums=None, artist_sort=None, - releasegroup_id=None, catalognum=None, script=None, - language=None, country=None, albumstatus=None, media=None, - albumdisambig=None, artist_credit=None, original_year=None, - original_month=None, original_day=None, data_source=None, - data_url=None): + + def __init__(self, tracks, album=None, album_id=None, artist=None, + artist_id=None, asin=None, albumtype=None, va=False, + year=None, month=None, day=None, label=None, mediums=None, + artist_sort=None, releasegroup_id=None, catalognum=None, + script=None, language=None, country=None, style=None, + genre=None, albumstatus=None, media=None, albumdisambig=None, + releasegroupdisambig=None, artist_credit=None, + original_year=None, original_month=None, + original_day=None, data_source=None, data_url=None, + discogs_albumid=None, discogs_labelid=None, + discogs_artistid=None, **kwargs): self.album = album self.album_id = album_id self.artist = artist @@ -94,15 +98,22 @@ class AlbumInfo(object): self.script = script self.language = language self.country = country + self.style = style + self.genre = genre self.albumstatus = albumstatus self.media = media self.albumdisambig = albumdisambig + self.releasegroupdisambig = releasegroupdisambig self.artist_credit = artist_credit self.original_year = original_year self.original_month = original_month self.original_day = original_day self.data_source = data_source self.data_url = data_url + self.discogs_albumid = discogs_albumid + self.discogs_labelid = discogs_labelid + self.discogs_artistid = discogs_artistid + self.update(kwargs) # Work around a bug in python-musicbrainz-ngs that causes some # strings to be bytes rather than Unicode. @@ -112,54 +123,46 @@ class AlbumInfo(object): constituent `TrackInfo` objects, are decoded to Unicode. """ for fld in ['album', 'artist', 'albumtype', 'label', 'artist_sort', - 'catalognum', 'script', 'language', 'country', - 'albumstatus', 'albumdisambig', 'artist_credit', 'media']: + 'catalognum', 'script', 'language', 'country', 'style', + 'genre', 'albumstatus', 'albumdisambig', + 'releasegroupdisambig', 'artist_credit', + 'media', 'discogs_albumid', 'discogs_labelid', + 'discogs_artistid']: value = getattr(self, fld) if isinstance(value, bytes): setattr(self, fld, value.decode(codec, 'ignore')) - if self.tracks: - for track in self.tracks: - track.decode(codec) + for track in self.tracks: + track.decode(codec) + + def copy(self): + dupe = AlbumInfo([]) + dupe.update(self) + dupe.tracks = [track.copy() for track in self.tracks] + return dupe -class TrackInfo(object): +class TrackInfo(AttrDict): """Describes a canonical track present on a release. Appears as part of an AlbumInfo's ``tracks`` list. Consists of these data members: - ``title``: name of the track - ``track_id``: MusicBrainz ID; UUID fragment only - - ``release_track_id``: MusicBrainz ID respective to a track on a - particular release; UUID fragment only - - ``artist``: individual track artist name - - ``artist_id`` - - ``length``: float: duration of the track in seconds - - ``index``: position on the entire release - - ``media``: delivery mechanism (Vinyl, etc.) - - ``medium``: the disc number this track appears on in the album - - ``medium_index``: the track's position on the disc - - ``medium_total``: the number of tracks on the item's disc - - ``artist_sort``: name of the track artist for sorting - - ``disctitle``: name of the individual medium (subtitle) - - ``artist_credit``: Recording-specific artist name - - ``data_source``: The original data source (MusicBrainz, Discogs, etc.) - - ``data_url``: The data source release URL. - - ``lyricist``: individual track lyricist name - - ``composer``: individual track composer name - - ``composer_sort``: individual track composer sort name - - ``arranger`: individual track arranger name - - ``track_alt``: alternative track number (tape, vinyl, etc.) Only ``title`` and ``track_id`` are required. The rest of the fields may be None. The indices ``index``, ``medium``, and ``medium_index`` are all 1-based. """ - def __init__(self, title, track_id, release_track_id=None, artist=None, - artist_id=None, length=None, index=None, medium=None, - medium_index=None, medium_total=None, artist_sort=None, - disctitle=None, artist_credit=None, data_source=None, - data_url=None, media=None, lyricist=None, composer=None, - composer_sort=None, arranger=None, track_alt=None): + + def __init__(self, title=None, track_id=None, release_track_id=None, + artist=None, artist_id=None, length=None, index=None, + medium=None, medium_index=None, medium_total=None, + artist_sort=None, disctitle=None, artist_credit=None, + data_source=None, data_url=None, media=None, lyricist=None, + composer=None, composer_sort=None, arranger=None, + track_alt=None, work=None, mb_workid=None, + work_disambig=None, bpm=None, initial_key=None, genre=None, + **kwargs): self.title = title self.track_id = track_id self.release_track_id = release_track_id @@ -181,6 +184,13 @@ class TrackInfo(object): self.composer_sort = composer_sort self.arranger = arranger self.track_alt = track_alt + self.work = work + self.mb_workid = mb_workid + self.work_disambig = work_disambig + self.bpm = bpm + self.initial_key = initial_key + self.genre = genre + self.update(kwargs) # As above, work around a bug in python-musicbrainz-ngs. def decode(self, codec='utf-8'): @@ -193,6 +203,11 @@ class TrackInfo(object): if isinstance(value, bytes): setattr(self, fld, value.decode(codec, 'ignore')) + def copy(self): + dupe = TrackInfo() + dupe.update(self) + return dupe + # Candidate distance scoring. @@ -220,8 +235,8 @@ def _string_dist_basic(str1, str2): transliteration/lowering to ASCII characters. Normalized by string length. """ - assert isinstance(str1, six.text_type) - assert isinstance(str2, six.text_type) + assert isinstance(str1, str) + assert isinstance(str2, str) str1 = as_string(unidecode(str1)) str2 = as_string(unidecode(str2)) str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) @@ -249,9 +264,9 @@ def string_dist(str1, str2): # "something, the". for word in SD_END_WORDS: if str1.endswith(', %s' % word): - str1 = '%s %s' % (word, str1[:-len(word) - 2]) + str1 = '{} {}'.format(word, str1[:-len(word) - 2]) if str2.endswith(', %s' % word): - str2 = '%s %s' % (word, str2[:-len(word) - 2]) + str2 = '{} {}'.format(word, str2[:-len(word) - 2]) # Perform a couple of basic normalizing substitutions. for pat, repl in SD_REPLACE: @@ -289,11 +304,12 @@ def string_dist(str1, str2): return base_dist + penalty -class LazyClassProperty(object): +class LazyClassProperty: """A decorator implementing a read-only property that is *lazy* in the sense that the getter is only invoked once. Subsequent accesses through *any* instance use the cached result. """ + def __init__(self, getter): self.getter = getter self.computed = False @@ -306,17 +322,17 @@ class LazyClassProperty(object): @total_ordering -@six.python_2_unicode_compatible -class Distance(object): +class Distance: """Keeps track of multiple distance penalties. Provides a single weighted distance for all penalties as well as a weighted distance for each individual penalty. """ + def __init__(self): self._penalties = {} @LazyClassProperty - def _weights(cls): # noqa + def _weights(cls): # noqa: N805 """A dictionary from keys to floating-point weights. """ weights_view = config['match']['distance_weights'] @@ -394,7 +410,7 @@ class Distance(object): return other - self.distance def __str__(self): - return "{0:.2f}".format(self.distance) + return f"{self.distance:.2f}" # Behave like a dict. @@ -421,7 +437,7 @@ class Distance(object): """ if not isinstance(dist, Distance): raise ValueError( - u'`dist` must be a Distance object, not {0}'.format(type(dist)) + '`dist` must be a Distance object, not {}'.format(type(dist)) ) for key, penalties in dist._penalties.items(): self._penalties.setdefault(key, []).extend(penalties) @@ -433,7 +449,7 @@ class Distance(object): be a compiled regular expression, in which case it will be matched against `value2`. """ - if isinstance(value1, re._pattern_type): + if isinstance(value1, Pattern): return bool(value1.match(value2)) return value1 == value2 @@ -445,7 +461,7 @@ class Distance(object): """ if not 0.0 <= dist <= 1.0: raise ValueError( - u'`dist` must be between 0.0 and 1.0, not {0}'.format(dist) + f'`dist` must be between 0.0 and 1.0, not {dist}' ) self._penalties.setdefault(key, []).append(dist) @@ -541,7 +557,7 @@ def album_for_mbid(release_id): try: album = mb.album_for_id(release_id) if album: - plugins.send(u'albuminfo_received', info=album) + plugins.send('albuminfo_received', info=album) return album except mb.MusicBrainzAPIError as exc: exc.log(log) @@ -554,7 +570,7 @@ def track_for_mbid(recording_id): try: track = mb.track_for_id(recording_id) if track: - plugins.send(u'trackinfo_received', info=track) + plugins.send('trackinfo_received', info=track) return track except mb.MusicBrainzAPIError as exc: exc.log(log) @@ -567,7 +583,7 @@ def albums_for_id(album_id): yield a for a in plugins.album_for_id(album_id): if a: - plugins.send(u'albuminfo_received', info=a) + plugins.send('albuminfo_received', info=a) yield a @@ -578,40 +594,43 @@ def tracks_for_id(track_id): yield t for t in plugins.track_for_id(track_id): if t: - plugins.send(u'trackinfo_received', info=t) + plugins.send('trackinfo_received', info=t) yield t -@plugins.notify_info_yielded(u'albuminfo_received') -def album_candidates(items, artist, album, va_likely): +@plugins.notify_info_yielded('albuminfo_received') +def album_candidates(items, artist, album, va_likely, extra_tags): """Search for album matches. ``items`` is a list of Item objects that make up the album. ``artist`` and ``album`` are the respective names (strings), which may be derived from the item list or may be entered by the user. ``va_likely`` is a boolean indicating whether - the album is likely to be a "various artists" release. + the album is likely to be a "various artists" release. ``extra_tags`` + is an optional dictionary of additional tags used to further + constrain the search. """ + # Base candidates if we have album and artist to match. if artist and album: try: - for candidate in mb.match_album(artist, album, len(items)): - yield candidate + yield from mb.match_album(artist, album, len(items), + extra_tags) except mb.MusicBrainzAPIError as exc: exc.log(log) # Also add VA matches from MusicBrainz where appropriate. if va_likely and album: try: - for candidate in mb.match_album(None, album, len(items)): - yield candidate + yield from mb.match_album(None, album, len(items), + extra_tags) except mb.MusicBrainzAPIError as exc: exc.log(log) # Candidates from plugins. - for candidate in plugins.candidates(items, artist, album, va_likely): - yield candidate + yield from plugins.candidates(items, artist, album, va_likely, + extra_tags) -@plugins.notify_info_yielded(u'trackinfo_received') +@plugins.notify_info_yielded('trackinfo_received') def item_candidates(item, artist, title): """Search for item matches. ``item`` is the Item to be matched. ``artist`` and ``title`` are strings and either reflect the item or @@ -621,11 +640,9 @@ def item_candidates(item, artist, title): # MusicBrainz candidates. if artist and title: try: - for candidate in mb.match_track(artist, title): - yield candidate + yield from mb.match_track(artist, title) except mb.MusicBrainzAPIError as exc: exc.log(log) # Plugin candidates. - for candidate in plugins.item_candidates(item, artist, title): - yield candidate + yield from plugins.item_candidates(item, artist, title) diff --git a/libs/common/beets/autotag/match.py b/libs/common/beets/autotag/match.py index 71b62adb..d352a013 100644 --- a/libs/common/beets/autotag/match.py +++ b/libs/common/beets/autotag/match.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -17,7 +16,6 @@ releases and tracks. """ -from __future__ import division, absolute_import, print_function import datetime import re @@ -35,7 +33,7 @@ from beets.util.enumeration import OrderedEnum # album level to determine whether a given release is likely a VA # release and also on the track level to to remove the penalty for # differing artists. -VA_ARTISTS = (u'', u'various artists', u'various', u'va', u'unknown') +VA_ARTISTS = ('', 'various artists', 'various', 'va', 'unknown') # Global logger. log = logging.getLogger('beets') @@ -108,7 +106,7 @@ def assign_items(items, tracks): log.debug('...done.') # Produce the output matching. - mapping = dict((items[i], tracks[j]) for (i, j) in matching) + mapping = {items[i]: tracks[j] for (i, j) in matching} extra_items = list(set(items) - set(mapping.keys())) extra_items.sort(key=lambda i: (i.disc, i.track, i.title)) extra_tracks = list(set(tracks) - set(mapping.values())) @@ -276,16 +274,16 @@ def match_by_id(items): try: first = next(albumids) except StopIteration: - log.debug(u'No album ID found.') + log.debug('No album ID found.') return None # Is there a consensus on the MB album ID? for other in albumids: if other != first: - log.debug(u'No album ID consensus.') + log.debug('No album ID consensus.') return None # If all album IDs are equal, look up the album. - log.debug(u'Searching for discovered album ID: {0}', first) + log.debug('Searching for discovered album ID: {0}', first) return hooks.album_for_mbid(first) @@ -351,23 +349,23 @@ def _add_candidate(items, results, info): checking the track count, ordering the items, checking for duplicates, and calculating the distance. """ - log.debug(u'Candidate: {0} - {1} ({2})', + log.debug('Candidate: {0} - {1} ({2})', info.artist, info.album, info.album_id) # Discard albums with zero tracks. if not info.tracks: - log.debug(u'No tracks.') + log.debug('No tracks.') return # Don't duplicate. if info.album_id in results: - log.debug(u'Duplicate.') + log.debug('Duplicate.') return # Discard matches without required tags. for req_tag in config['match']['required'].as_str_seq(): if getattr(info, req_tag) is None: - log.debug(u'Ignored. Missing required tag: {0}', req_tag) + log.debug('Ignored. Missing required tag: {0}', req_tag) return # Find mapping between the items and the track info. @@ -380,10 +378,10 @@ def _add_candidate(items, results, info): penalties = [key for key, _ in dist] for penalty in config['match']['ignored'].as_str_seq(): if penalty in penalties: - log.debug(u'Ignored. Penalty: {0}', penalty) + log.debug('Ignored. Penalty: {0}', penalty) return - log.debug(u'Success. Distance: {0}', dist) + log.debug('Success. Distance: {0}', dist) results[info.album_id] = hooks.AlbumMatch(dist, info, mapping, extra_items, extra_tracks) @@ -411,7 +409,7 @@ def tag_album(items, search_artist=None, search_album=None, likelies, consensus = current_metadata(items) cur_artist = likelies['artist'] cur_album = likelies['album'] - log.debug(u'Tagging {0} - {1}', cur_artist, cur_album) + log.debug('Tagging {0} - {1}', cur_artist, cur_album) # The output result (distance, AlbumInfo) tuples (keyed by MB album # ID). @@ -420,7 +418,7 @@ def tag_album(items, search_artist=None, search_album=None, # Search by explicit ID. if search_ids: for search_id in search_ids: - log.debug(u'Searching for album ID: {0}', search_id) + log.debug('Searching for album ID: {0}', search_id) for id_candidate in hooks.albums_for_id(search_id): _add_candidate(items, candidates, id_candidate) @@ -431,13 +429,13 @@ def tag_album(items, search_artist=None, search_album=None, if id_info: _add_candidate(items, candidates, id_info) rec = _recommendation(list(candidates.values())) - log.debug(u'Album ID match recommendation is {0}', rec) + log.debug('Album ID match recommendation is {0}', rec) if candidates and not config['import']['timid']: # If we have a very good MBID match, return immediately. # Otherwise, this match will compete against metadata-based # matches. if rec == Recommendation.strong: - log.debug(u'ID match.') + log.debug('ID match.') return cur_artist, cur_album, \ Proposal(list(candidates.values()), rec) @@ -445,22 +443,29 @@ def tag_album(items, search_artist=None, search_album=None, if not (search_artist and search_album): # No explicit search terms -- use current metadata. search_artist, search_album = cur_artist, cur_album - log.debug(u'Search terms: {0} - {1}', search_artist, search_album) + log.debug('Search terms: {0} - {1}', search_artist, search_album) + + extra_tags = None + if config['musicbrainz']['extra_tags']: + tag_list = config['musicbrainz']['extra_tags'].get() + extra_tags = {k: v for (k, v) in likelies.items() if k in tag_list} + log.debug('Additional search terms: {0}', extra_tags) # Is this album likely to be a "various artist" release? va_likely = ((not consensus['artist']) or (search_artist.lower() in VA_ARTISTS) or any(item.comp for item in items)) - log.debug(u'Album might be VA: {0}', va_likely) + log.debug('Album might be VA: {0}', va_likely) # Get the results from the data sources. for matched_candidate in hooks.album_candidates(items, search_artist, search_album, - va_likely): + va_likely, + extra_tags): _add_candidate(items, candidates, matched_candidate) - log.debug(u'Evaluating {0} candidates.', len(candidates)) + log.debug('Evaluating {0} candidates.', len(candidates)) # Sort and get the recommendation. candidates = _sort_candidates(candidates.values()) rec = _recommendation(candidates) @@ -485,7 +490,7 @@ def tag_item(item, search_artist=None, search_title=None, trackids = search_ids or [t for t in [item.mb_trackid] if t] if trackids: for trackid in trackids: - log.debug(u'Searching for track ID: {0}', trackid) + log.debug('Searching for track ID: {0}', trackid) for track_info in hooks.tracks_for_id(trackid): dist = track_distance(item, track_info, incl_artist=True) candidates[track_info.track_id] = \ @@ -494,7 +499,7 @@ def tag_item(item, search_artist=None, search_title=None, rec = _recommendation(_sort_candidates(candidates.values())) if rec == Recommendation.strong and \ not config['import']['timid']: - log.debug(u'Track ID match.') + log.debug('Track ID match.') return Proposal(_sort_candidates(candidates.values()), rec) # If we're searching by ID, don't proceed. @@ -507,7 +512,7 @@ def tag_item(item, search_artist=None, search_title=None, # Search terms. if not (search_artist and search_title): search_artist, search_title = item.artist, item.title - log.debug(u'Item search terms: {0} - {1}', search_artist, search_title) + log.debug('Item search terms: {0} - {1}', search_artist, search_title) # Get and evaluate candidate metadata. for track_info in hooks.item_candidates(item, search_artist, search_title): @@ -515,7 +520,7 @@ def tag_item(item, search_artist=None, search_title=None, candidates[track_info.track_id] = hooks.TrackMatch(dist, track_info) # Sort by distance and return with recommendation. - log.debug(u'Found {0} candidates.', len(candidates)) + log.debug('Found {0} candidates.', len(candidates)) candidates = _sort_candidates(candidates.values()) rec = _recommendation(candidates) return Proposal(candidates, rec) diff --git a/libs/common/beets/autotag/mb.py b/libs/common/beets/autotag/mb.py index 2b28a5cc..e6a2e277 100644 --- a/libs/common/beets/autotag/mb.py +++ b/libs/common/beets/autotag/mb.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,57 +14,72 @@ """Searches for albums in the MusicBrainz database. """ -from __future__ import division, absolute_import, print_function import musicbrainzngs import re import traceback -from six.moves.urllib.parse import urljoin from beets import logging +from beets import plugins import beets.autotag.hooks import beets from beets import util from beets import config -import six +from collections import Counter +from urllib.parse import urljoin VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' -if util.SNI_SUPPORTED: - BASE_URL = 'https://musicbrainz.org/' -else: - BASE_URL = 'http://musicbrainz.org/' +BASE_URL = 'https://musicbrainz.org/' SKIPPED_TRACKS = ['[data track]'] +FIELDS_TO_MB_KEYS = { + 'catalognum': 'catno', + 'country': 'country', + 'label': 'label', + 'media': 'format', + 'year': 'date', +} + musicbrainzngs.set_useragent('beets', beets.__version__, - 'http://beets.io/') + 'https://beets.io/') class MusicBrainzAPIError(util.HumanReadableException): """An error while talking to MusicBrainz. The `query` field is the parameter to the action and may have any type. """ + def __init__(self, reason, verb, query, tb=None): self.query = query if isinstance(reason, musicbrainzngs.WebServiceError): - reason = u'MusicBrainz not reachable' - super(MusicBrainzAPIError, self).__init__(reason, verb, tb) + reason = 'MusicBrainz not reachable' + super().__init__(reason, verb, tb) def get_message(self): - return u'{0} in {1} with query {2}'.format( + return '{} in {} with query {}'.format( self._reasonstr(), self.verb, repr(self.query) ) + log = logging.getLogger('beets') RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', 'labels', 'artist-credits', 'aliases', 'recording-level-rels', 'work-rels', - 'work-level-rels', 'artist-rels'] -TRACK_INCLUDES = ['artists', 'aliases'] + 'work-level-rels', 'artist-rels', 'isrcs'] +BROWSE_INCLUDES = ['artist-credits', 'work-rels', + 'artist-rels', 'recording-rels', 'release-rels'] +if "work-level-rels" in musicbrainzngs.VALID_BROWSE_INCLUDES['recording']: + BROWSE_INCLUDES.append("work-level-rels") +BROWSE_CHUNKSIZE = 100 +BROWSE_MAXTRACKS = 500 +TRACK_INCLUDES = ['artists', 'aliases', 'isrcs'] if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] +if 'genres' in musicbrainzngs.VALID_INCLUDES['recording']: + RELEASE_INCLUDES += ['genres'] def track_url(trackid): @@ -81,7 +95,11 @@ def configure(): from the beets configuration. This should be called at startup. """ hostname = config['musicbrainz']['host'].as_str() - musicbrainzngs.set_hostname(hostname) + https = config['musicbrainz']['https'].get(bool) + # Only call set_hostname when a custom server is configured. Since + # musicbrainz-ngs connects to musicbrainz.org with HTTPS by default + if hostname != "musicbrainz.org": + musicbrainzngs.set_hostname(hostname, https) musicbrainzngs.set_rate_limit( config['musicbrainz']['ratelimit_interval'].as_number(), config['musicbrainz']['ratelimit'].get(int), @@ -138,7 +156,7 @@ def _flatten_artist_credit(credit): artist_sort_parts = [] artist_credit_parts = [] for el in credit: - if isinstance(el, six.string_types): + if isinstance(el, str): # Join phrase. artist_parts.append(el) artist_credit_parts.append(el) @@ -185,13 +203,13 @@ def track_info(recording, index=None, medium=None, medium_index=None, the number of tracks on the medium. Each number is a 1-based index. """ info = beets.autotag.hooks.TrackInfo( - recording['title'], - recording['id'], + title=recording['title'], + track_id=recording['id'], index=index, medium=medium, medium_index=medium_index, medium_total=medium_total, - data_source=u'MusicBrainz', + data_source='MusicBrainz', data_url=track_url(recording['id']), ) @@ -207,12 +225,22 @@ def track_info(recording, index=None, medium=None, medium_index=None, if recording.get('length'): info.length = int(recording['length']) / (1000.0) + info.trackdisambig = recording.get('disambiguation') + + if recording.get('isrc-list'): + info.isrc = ';'.join(recording['isrc-list']) + lyricist = [] composer = [] composer_sort = [] for work_relation in recording.get('work-relation-list', ()): if work_relation['type'] != 'performance': continue + info.work = work_relation['work']['title'] + info.mb_workid = work_relation['work']['id'] + if 'disambiguation' in work_relation['work']: + info.work_disambig = work_relation['work']['disambiguation'] + for artist_relation in work_relation['work'].get( 'artist-relation-list', ()): if 'type' in artist_relation: @@ -224,10 +252,10 @@ def track_info(recording, index=None, medium=None, medium_index=None, composer_sort.append( artist_relation['artist']['sort-name']) if lyricist: - info.lyricist = u', '.join(lyricist) + info.lyricist = ', '.join(lyricist) if composer: - info.composer = u', '.join(composer) - info.composer_sort = u', '.join(composer_sort) + info.composer = ', '.join(composer) + info.composer_sort = ', '.join(composer_sort) arranger = [] for artist_relation in recording.get('artist-relation-list', ()): @@ -236,7 +264,12 @@ def track_info(recording, index=None, medium=None, medium_index=None, if type == 'arranger': arranger.append(artist_relation['artist']['name']) if arranger: - info.arranger = u', '.join(arranger) + info.arranger = ', '.join(arranger) + + # Supplementary fields provided by plugins + extra_trackdatas = plugins.send('mb_track_extract', data=recording) + for extra_trackdata in extra_trackdatas: + info.update(extra_trackdata) info.decode() return info @@ -270,6 +303,26 @@ def album_info(release): artist_name, artist_sort_name, artist_credit_name = \ _flatten_artist_credit(release['artist-credit']) + ntracks = sum(len(m['track-list']) for m in release['medium-list']) + + # The MusicBrainz API omits 'artist-relation-list' and 'work-relation-list' + # when the release has more than 500 tracks. So we use browse_recordings + # on chunks of tracks to recover the same information in this case. + if ntracks > BROWSE_MAXTRACKS: + log.debug('Album {} has too many tracks', release['id']) + recording_list = [] + for i in range(0, ntracks, BROWSE_CHUNKSIZE): + log.debug('Retrieving tracks starting at {}', i) + recording_list.extend(musicbrainzngs.browse_recordings( + release=release['id'], limit=BROWSE_CHUNKSIZE, + includes=BROWSE_INCLUDES, + offset=i)['recording-list']) + track_map = {r['id']: r for r in recording_list} + for medium in release['medium-list']: + for recording in medium['track-list']: + recording_info = track_map[recording['recording']['id']] + recording['recording'] = recording_info + # Basic info. track_infos = [] index = 0 @@ -281,7 +334,8 @@ def album_info(release): continue all_tracks = medium['track-list'] - if 'data-track-list' in medium: + if ('data-track-list' in medium + and not config['match']['ignore_data_tracks']): all_tracks += medium['data-track-list'] track_count = len(all_tracks) @@ -327,15 +381,15 @@ def album_info(release): track_infos.append(ti) info = beets.autotag.hooks.AlbumInfo( - release['title'], - release['id'], - artist_name, - release['artist-credit'][0]['artist']['id'], - track_infos, + album=release['title'], + album_id=release['id'], + artist=artist_name, + artist_id=release['artist-credit'][0]['artist']['id'], + tracks=track_infos, mediums=len(release['medium-list']), artist_sort=artist_sort_name, artist_credit=artist_credit_name, - data_source=u'MusicBrainz', + data_source='MusicBrainz', data_url=album_url(release['id']), ) info.va = info.artist_id == VARIOUS_ARTISTS_ID @@ -345,13 +399,12 @@ def album_info(release): info.releasegroup_id = release['release-group']['id'] info.albumstatus = release.get('status') - # Build up the disambiguation string from the release group and release. - disambig = [] + # Get the disambiguation strings at the release and release group level. if release['release-group'].get('disambiguation'): - disambig.append(release['release-group'].get('disambiguation')) + info.releasegroupdisambig = \ + release['release-group'].get('disambiguation') if release.get('disambiguation'): - disambig.append(release.get('disambiguation')) - info.albumdisambig = u', '.join(disambig) + info.albumdisambig = release.get('disambiguation') # Get the "classic" Release type. This data comes from a legacy API # feature before MusicBrainz supported multiple release types. @@ -360,18 +413,17 @@ def album_info(release): if reltype: info.albumtype = reltype.lower() - # Log the new-style "primary" and "secondary" release types. - # Eventually, we'd like to actually store this data, but we just log - # it for now to help understand the differences. + # Set the new-style "primary" and "secondary" release types. + albumtypes = [] if 'primary-type' in release['release-group']: rel_primarytype = release['release-group']['primary-type'] if rel_primarytype: - log.debug('primary MB release type: ' + rel_primarytype.lower()) + albumtypes.append(rel_primarytype.lower()) if 'secondary-type-list' in release['release-group']: if release['release-group']['secondary-type-list']: - log.debug('secondary MB release type(s): ' + ', '.join( - [secondarytype.lower() for secondarytype in - release['release-group']['secondary-type-list']])) + for sec_type in release['release-group']['secondary-type-list']: + albumtypes.append(sec_type.lower()) + info.albumtypes = '; '.join(albumtypes) # Release events. info.country, release_date = _preferred_release_event(release) @@ -402,17 +454,33 @@ def album_info(release): first_medium = release['medium-list'][0] info.media = first_medium.get('format') + if config['musicbrainz']['genres']: + sources = [ + release['release-group'].get('genre-list', []), + release.get('genre-list', []), + ] + genres = Counter() + for source in sources: + for genreitem in source: + genres[genreitem['name']] += int(genreitem['count']) + info.genre = '; '.join(g[0] for g in sorted(genres.items(), + key=lambda g: -g[1])) + + extra_albumdatas = plugins.send('mb_album_extract', data=release) + for extra_albumdata in extra_albumdatas: + info.update(extra_albumdata) + info.decode() return info -def match_album(artist, album, tracks=None): +def match_album(artist, album, tracks=None, extra_tags=None): """Searches for a single album ("release" in MusicBrainz parlance) and returns an iterator over AlbumInfo objects. May raise a MusicBrainzAPIError. The query consists of an artist name, an album name, and, - optionally, a number of tracks on the album. + optionally, a number of tracks on the album and any other extra tags. """ # Build search criteria. criteria = {'release': album.lower().strip()} @@ -422,14 +490,24 @@ def match_album(artist, album, tracks=None): # Various Artists search. criteria['arid'] = VARIOUS_ARTISTS_ID if tracks is not None: - criteria['tracks'] = six.text_type(tracks) + criteria['tracks'] = str(tracks) + + # Additional search cues from existing metadata. + if extra_tags: + for tag in extra_tags: + key = FIELDS_TO_MB_KEYS[tag] + value = str(extra_tags.get(tag, '')).lower().strip() + if key == 'catno': + value = value.replace(' ', '') + if value: + criteria[key] = value # Abort if we have no search terms. if not any(criteria.values()): return try: - log.debug(u'Searching for MusicBrainz releases with: {!r}', criteria) + log.debug('Searching for MusicBrainz releases with: {!r}', criteria) res = musicbrainzngs.search_releases( limit=config['musicbrainz']['searchlimit'].get(int), **criteria) except musicbrainzngs.MusicBrainzError as exc: @@ -470,7 +548,7 @@ def _parse_id(s): no ID can be found, return None. """ # Find the first thing that looks like a UUID/MBID. - match = re.search(u'[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) + match = re.search('[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) if match: return match.group() @@ -480,19 +558,19 @@ def album_for_id(releaseid): object or None if the album is not found. May raise a MusicBrainzAPIError. """ - log.debug(u'Requesting MusicBrainz release {}', releaseid) + log.debug('Requesting MusicBrainz release {}', releaseid) albumid = _parse_id(releaseid) if not albumid: - log.debug(u'Invalid MBID ({0}).', releaseid) + log.debug('Invalid MBID ({0}).', releaseid) return try: res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES) except musicbrainzngs.ResponseError: - log.debug(u'Album ID match failed.') + log.debug('Album ID match failed.') return None except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, u'get release by ID', albumid, + raise MusicBrainzAPIError(exc, 'get release by ID', albumid, traceback.format_exc()) return album_info(res['release']) @@ -503,14 +581,14 @@ def track_for_id(releaseid): """ trackid = _parse_id(releaseid) if not trackid: - log.debug(u'Invalid MBID ({0}).', releaseid) + log.debug('Invalid MBID ({0}).', releaseid) return try: res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) except musicbrainzngs.ResponseError: - log.debug(u'Track ID match failed.') + log.debug('Track ID match failed.') return None except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, u'get recording by ID', trackid, + raise MusicBrainzAPIError(exc, 'get recording by ID', trackid, traceback.format_exc()) return track_info(res['recording']) diff --git a/libs/common/beets/config_default.yaml b/libs/common/beets/config_default.yaml index 273f9423..74540891 100644 --- a/libs/common/beets/config_default.yaml +++ b/libs/common/beets/config_default.yaml @@ -7,6 +7,7 @@ import: move: no link: no hardlink: no + reflink: no delete: no resume: ask incremental: no @@ -44,10 +45,20 @@ replace: '^\s+': '' '^-': _ path_sep_replace: _ +drive_sep_replace: _ asciify_paths: false art_filename: cover max_filename_length: 0 +aunique: + keys: albumartist album + disambiguators: albumtype year label catalognum albumdisambig releasegroupdisambig + bracket: '[]' + +overwrite_null: + album: [] + track: [] + plugins: [] pluginpath: [] threaded: yes @@ -91,9 +102,12 @@ statefile: state.pickle musicbrainz: host: musicbrainz.org + https: no ratelimit: 1 ratelimit_interval: 1.0 searchlimit: 5 + extra_tags: [] + genres: no match: strong_rec_thresh: 0.04 @@ -129,6 +143,7 @@ match: ignored: [] required: [] ignored_media: [] + ignore_data_tracks: yes ignore_video_tracks: yes track_length_grace: 10 track_length_max: 30 diff --git a/libs/common/beets/dbcore/__init__.py b/libs/common/beets/dbcore/__init__.py index 689e7202..923c34ca 100644 --- a/libs/common/beets/dbcore/__init__.py +++ b/libs/common/beets/dbcore/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,7 +15,6 @@ """DBCore is an abstract database package that forms the basis for beets' Library. """ -from __future__ import division, absolute_import, print_function from .db import Model, Database from .query import Query, FieldQuery, MatchQuery, AndQuery, OrQuery diff --git a/libs/common/beets/dbcore/db.py b/libs/common/beets/dbcore/db.py index 0f4dc151..acd131be 100644 --- a/libs/common/beets/dbcore/db.py +++ b/libs/common/beets/dbcore/db.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,22 +14,21 @@ """The central Model and Database constructs for DBCore. """ -from __future__ import division, absolute_import, print_function import time import os +import re from collections import defaultdict import threading import sqlite3 import contextlib -import collections import beets -from beets.util.functemplate import Template +from beets.util import functemplate from beets.util import py3_path from beets.dbcore import types from .query import MatchQuery, NullSort, TrueQuery -import six +from collections.abc import Mapping class DBAccessError(Exception): @@ -42,20 +40,30 @@ class DBAccessError(Exception): """ -class FormattedMapping(collections.Mapping): +class FormattedMapping(Mapping): """A `dict`-like formatted view of a model. The accessor `mapping[key]` returns the formatted version of `model[key]` as a unicode string. + The `included_keys` parameter allows filtering the fields that are + returned. By default all fields are returned. Limiting to specific keys can + avoid expensive per-item database queries. + If `for_path` is true, all path separators in the formatted values are replaced. """ - def __init__(self, model, for_path=False): + ALL_KEYS = '*' + + def __init__(self, model, included_keys=ALL_KEYS, for_path=False): self.for_path = for_path self.model = model - self.model_keys = model.keys(True) + if included_keys == self.ALL_KEYS: + # Performance note: this triggers a database query. + self.model_keys = self.model.keys(True) + else: + self.model_keys = included_keys def __getitem__(self, key): if key in self.model_keys: @@ -72,7 +80,7 @@ class FormattedMapping(collections.Mapping): def get(self, key, default=None): if default is None: default = self.model._type(key).format(None) - return super(FormattedMapping, self).get(key, default) + return super().get(key, default) def _get_formatted(self, model, key): value = model._type(key).format(model.get(key)) @@ -81,6 +89,11 @@ class FormattedMapping(collections.Mapping): if self.for_path: sep_repl = beets.config['path_sep_replace'].as_str() + sep_drive = beets.config['drive_sep_replace'].as_str() + + if re.match(r'^\w:', value): + value = re.sub(r'(?<=^\w):', sep_drive, value) + for sep in (os.path.sep, os.path.altsep): if sep: value = value.replace(sep, sep_repl) @@ -88,11 +101,105 @@ class FormattedMapping(collections.Mapping): return value +class LazyConvertDict: + """Lazily convert types for attributes fetched from the database + """ + + def __init__(self, model_cls): + """Initialize the object empty + """ + self.data = {} + self.model_cls = model_cls + self._converted = {} + + def init(self, data): + """Set the base data that should be lazily converted + """ + self.data = data + + def _convert(self, key, value): + """Convert the attribute type according the the SQL type + """ + return self.model_cls._type(key).from_sql(value) + + def __setitem__(self, key, value): + """Set an attribute value, assume it's already converted + """ + self._converted[key] = value + + def __getitem__(self, key): + """Get an attribute value, converting the type on demand + if needed + """ + if key in self._converted: + return self._converted[key] + elif key in self.data: + value = self._convert(key, self.data[key]) + self._converted[key] = value + return value + + def __delitem__(self, key): + """Delete both converted and base data + """ + if key in self._converted: + del self._converted[key] + if key in self.data: + del self.data[key] + + def keys(self): + """Get a list of available field names for this object. + """ + return list(self._converted.keys()) + list(self.data.keys()) + + def copy(self): + """Create a copy of the object. + """ + new = self.__class__(self.model_cls) + new.data = self.data.copy() + new._converted = self._converted.copy() + return new + + # Act like a dictionary. + + def update(self, values): + """Assign all values in the given dict. + """ + for key, value in values.items(): + self[key] = value + + def items(self): + """Iterate over (key, value) pairs that this object contains. + Computed fields are not included. + """ + for key in self: + yield key, self[key] + + def get(self, key, default=None): + """Get the value for a given key or `default` if it does not + exist. + """ + if key in self: + return self[key] + else: + return default + + def __contains__(self, key): + """Determine whether `key` is an attribute on this object. + """ + return key in self.keys() + + def __iter__(self): + """Iterate over the available field names (excluding computed + fields). + """ + return iter(self.keys()) + + # Abstract base for model classes. -class Model(object): +class Model: """An abstract object representing an object in the database. Model - objects act like dictionaries (i.e., the allow subscript access like + objects act like dictionaries (i.e., they allow subscript access like ``obj['field']``). The same field set is available via attribute access as a shortcut (i.e., ``obj.field``). Three kinds of attributes are available: @@ -143,12 +250,22 @@ class Model(object): are subclasses of `Sort`. """ + _queries = {} + """Named queries that use a field-like `name:value` syntax but which + do not relate to any specific field. + """ + _always_dirty = False """By default, fields only become "dirty" when their value actually changes. Enabling this flag marks fields as dirty even when the new value is the same as the old value (e.g., `o.f = o.f`). """ + _revision = -1 + """A revision number from when the model was loaded from or written + to the database. + """ + @classmethod def _getters(cls): """Return a mapping from field names to getter functions. @@ -172,8 +289,8 @@ class Model(object): """ self._db = db self._dirty = set() - self._values_fixed = {} - self._values_flex = {} + self._values_fixed = LazyConvertDict(self) + self._values_flex = LazyConvertDict(self) # Initial contents. self.update(values) @@ -187,23 +304,25 @@ class Model(object): ordinary construction are bypassed. """ obj = cls(db) - for key, value in fixed_values.items(): - obj._values_fixed[key] = cls._type(key).from_sql(value) - for key, value in flex_values.items(): - obj._values_flex[key] = cls._type(key).from_sql(value) + + obj._values_fixed.init(fixed_values) + obj._values_flex.init(flex_values) + return obj def __repr__(self): - return '{0}({1})'.format( + return '{}({})'.format( type(self).__name__, - ', '.join('{0}={1!r}'.format(k, v) for k, v in dict(self).items()), + ', '.join(f'{k}={v!r}' for k, v in dict(self).items()), ) def clear_dirty(self): """Mark all fields as *clean* (i.e., not needing to be stored to - the database). + the database). Also update the revision. """ self._dirty = set() + if self._db: + self._revision = self._db.revision def _check_db(self, need_id=True): """Ensure that this object is associated with a database row: it @@ -212,10 +331,10 @@ class Model(object): """ if not self._db: raise ValueError( - u'{0} has no database'.format(type(self).__name__) + '{} has no database'.format(type(self).__name__) ) if need_id and not self.id: - raise ValueError(u'{0} has no id'.format(type(self).__name__)) + raise ValueError('{} has no id'.format(type(self).__name__)) def copy(self): """Create a copy of the model object. @@ -243,19 +362,32 @@ class Model(object): """ return cls._fields.get(key) or cls._types.get(key) or types.DEFAULT - def __getitem__(self, key): - """Get the value for a field. Raise a KeyError if the field is - not available. + def _get(self, key, default=None, raise_=False): + """Get the value for a field, or `default`. Alternatively, + raise a KeyError if the field is not available. """ getters = self._getters() if key in getters: # Computed. return getters[key](self) elif key in self._fields: # Fixed. - return self._values_fixed.get(key, self._type(key).null) + if key in self._values_fixed: + return self._values_fixed[key] + else: + return self._type(key).null elif key in self._values_flex: # Flexible. return self._values_flex[key] - else: + elif raise_: raise KeyError(key) + else: + return default + + get = _get + + def __getitem__(self, key): + """Get the value for a field. Raise a KeyError if the field is + not available. + """ + return self._get(key, raise_=True) def _setitem(self, key, value): """Assign the value for a field, return whether new and old value @@ -290,12 +422,12 @@ class Model(object): if key in self._values_flex: # Flexible. del self._values_flex[key] self._dirty.add(key) # Mark for dropping on store. + elif key in self._fields: # Fixed + setattr(self, key, self._type(key).null) elif key in self._getters(): # Computed. - raise KeyError(u'computed field {0} cannot be deleted'.format(key)) - elif key in self._fields: # Fixed. - raise KeyError(u'fixed field {0} cannot be deleted'.format(key)) + raise KeyError(f'computed field {key} cannot be deleted') else: - raise KeyError(u'no such field {0}'.format(key)) + raise KeyError(f'no such field {key}') def keys(self, computed=False): """Get a list of available field names for this object. The @@ -330,19 +462,10 @@ class Model(object): for key in self: yield key, self[key] - def get(self, key, default=None): - """Get the value for a given key or `default` if it does not - exist. - """ - if key in self: - return self[key] - else: - return default - def __contains__(self, key): """Determine whether `key` is an attribute on this object. """ - return key in self.keys(True) + return key in self.keys(computed=True) def __iter__(self): """Iterate over the available field names (excluding computed @@ -354,22 +477,22 @@ class Model(object): def __getattr__(self, key): if key.startswith('_'): - raise AttributeError(u'model has no attribute {0!r}'.format(key)) + raise AttributeError(f'model has no attribute {key!r}') else: try: return self[key] except KeyError: - raise AttributeError(u'no such field {0!r}'.format(key)) + raise AttributeError(f'no such field {key!r}') def __setattr__(self, key, value): if key.startswith('_'): - super(Model, self).__setattr__(key, value) + super().__setattr__(key, value) else: self[key] = value def __delattr__(self, key): if key.startswith('_'): - super(Model, self).__delattr__(key) + super().__delattr__(key) else: del self[key] @@ -398,7 +521,7 @@ class Model(object): with self._db.transaction() as tx: # Main table update. if assignments: - query = 'UPDATE {0} SET {1} WHERE id=?'.format( + query = 'UPDATE {} SET {} WHERE id=?'.format( self._table, assignments ) subvars.append(self.id) @@ -409,7 +532,7 @@ class Model(object): if key in self._dirty: self._dirty.remove(key) tx.mutate( - 'INSERT INTO {0} ' + 'INSERT INTO {} ' '(entity_id, key, value) ' 'VALUES (?, ?, ?);'.format(self._flex_table), (self.id, key, value), @@ -418,7 +541,7 @@ class Model(object): # Deleted flexible attributes. for key in self._dirty: tx.mutate( - 'DELETE FROM {0} ' + 'DELETE FROM {} ' 'WHERE entity_id=? AND key=?'.format(self._flex_table), (self.id, key) ) @@ -427,12 +550,18 @@ class Model(object): def load(self): """Refresh the object's metadata from the library database. + + If check_revision is true, the database is only queried loaded when a + transaction has been committed since the item was last loaded. """ self._check_db() + if not self._dirty and self._db.revision == self._revision: + # Exit early + return stored_obj = self._db._get(type(self), self.id) - assert stored_obj is not None, u"object {0} not in DB".format(self.id) - self._values_fixed = {} - self._values_flex = {} + assert stored_obj is not None, f"object {self.id} not in DB" + self._values_fixed = LazyConvertDict(self) + self._values_flex = LazyConvertDict(self) self.update(dict(stored_obj)) self.clear_dirty() @@ -442,11 +571,11 @@ class Model(object): self._check_db() with self._db.transaction() as tx: tx.mutate( - 'DELETE FROM {0} WHERE id=?'.format(self._table), + f'DELETE FROM {self._table} WHERE id=?', (self.id,) ) tx.mutate( - 'DELETE FROM {0} WHERE entity_id=?'.format(self._flex_table), + f'DELETE FROM {self._flex_table} WHERE entity_id=?', (self.id,) ) @@ -464,7 +593,7 @@ class Model(object): with self._db.transaction() as tx: new_id = tx.mutate( - 'INSERT INTO {0} DEFAULT VALUES'.format(self._table) + f'INSERT INTO {self._table} DEFAULT VALUES' ) self.id = new_id self.added = time.time() @@ -479,11 +608,11 @@ class Model(object): _formatter = FormattedMapping - def formatted(self, for_path=False): + def formatted(self, included_keys=_formatter.ALL_KEYS, for_path=False): """Get a mapping containing all values on this object formatted as human-readable unicode strings. """ - return self._formatter(self, for_path) + return self._formatter(self, included_keys, for_path) def evaluate_template(self, template, for_path=False): """Evaluate a template (a string or a `Template` object) using @@ -491,9 +620,9 @@ class Model(object): separators will be added to the template. """ # Perform substitution. - if isinstance(template, six.string_types): - template = Template(template) - return template.substitute(self.formatted(for_path), + if isinstance(template, str): + template = functemplate.template(template) + return template.substitute(self.formatted(for_path=for_path), self._template_funcs()) # Parsing. @@ -502,8 +631,8 @@ class Model(object): def _parse(cls, key, string): """Parse a string as a value for the given key. """ - if not isinstance(string, six.string_types): - raise TypeError(u"_parse() argument must be a string") + if not isinstance(string, str): + raise TypeError("_parse() argument must be a string") return cls._type(key).parse(string) @@ -515,11 +644,13 @@ class Model(object): # Database controller and supporting interfaces. -class Results(object): +class Results: """An item query result set. Iterating over the collection lazily constructs LibModel objects that reflect database rows. """ - def __init__(self, model_class, rows, db, query=None, sort=None): + + def __init__(self, model_class, rows, db, flex_rows, + query=None, sort=None): """Create a result set that will construct objects of type `model_class`. @@ -539,6 +670,7 @@ class Results(object): self.db = db self.query = query self.sort = sort + self.flex_rows = flex_rows # We keep a queue of rows we haven't yet consumed for # materialization. We preserve the original total number of @@ -560,6 +692,10 @@ class Results(object): a `Results` object a second time should be much faster than the first. """ + + # Index flexible attributes by the item ID, so we have easier access + flex_attrs = self._get_indexed_flex_attrs() + index = 0 # Position in the materialized objects. while index < len(self._objects) or self._rows: # Are there previously-materialized objects to produce? @@ -572,7 +708,7 @@ class Results(object): else: while self._rows: row = self._rows.pop(0) - obj = self._make_model(row) + obj = self._make_model(row, flex_attrs.get(row['id'], {})) # If there is a slow-query predicate, ensurer that the # object passes it. if not self.query or self.query.match(obj): @@ -594,20 +730,24 @@ class Results(object): # Objects are pre-sorted (i.e., by the database). return self._get_objects() - def _make_model(self, row): - # Get the flexible attributes for the object. - with self.db.transaction() as tx: - flex_rows = tx.query( - 'SELECT * FROM {0} WHERE entity_id=?'.format( - self.model_class._flex_table - ), - (row['id'],) - ) + def _get_indexed_flex_attrs(self): + """ Index flexible attributes by the entity id they belong to + """ + flex_values = {} + for row in self.flex_rows: + if row['entity_id'] not in flex_values: + flex_values[row['entity_id']] = {} + flex_values[row['entity_id']][row['key']] = row['value'] + + return flex_values + + def _make_model(self, row, flex_values={}): + """ Create a Model object for the given row + """ cols = dict(row) - values = dict((k, v) for (k, v) in cols.items() - if not k[:4] == 'flex') - flex_values = dict((row['key'], row['value']) for row in flex_rows) + values = {k: v for (k, v) in cols.items() + if not k[:4] == 'flex'} # Construct the Python object obj = self.model_class._awaken(self.db, values, flex_values) @@ -656,7 +796,7 @@ class Results(object): next(it) return next(it) except StopIteration: - raise IndexError(u'result index {0} out of range'.format(n)) + raise IndexError(f'result index {n} out of range') def get(self): """Return the first matching object, or None if no objects @@ -669,10 +809,16 @@ class Results(object): return None -class Transaction(object): +class Transaction: """A context manager for safe, concurrent access to the database. All SQL commands should be executed through a transaction. """ + + _mutated = False + """A flag storing whether a mutation has been executed in the + current transaction. + """ + def __init__(self, db): self.db = db @@ -694,12 +840,15 @@ class Transaction(object): entered but not yet exited transaction. If it is the last active transaction, the database updates are committed. """ + # Beware of races; currently secured by db._db_lock + self.db.revision += self._mutated with self.db._tx_stack() as stack: assert stack.pop() is self empty = not stack if empty: # Ending a "root" transaction. End the SQLite transaction. self.db._connection().commit() + self._mutated = False self.db._db_lock.release() def query(self, statement, subvals=()): @@ -715,7 +864,6 @@ class Transaction(object): """ try: cursor = self.db._connection().execute(statement, subvals) - return cursor.lastrowid except sqlite3.OperationalError as e: # In two specific cases, SQLite reports an error while accessing # the underlying database file. We surface these exceptions as @@ -725,26 +873,41 @@ class Transaction(object): raise DBAccessError(e.args[0]) else: raise + else: + self._mutated = True + return cursor.lastrowid def script(self, statements): """Execute a string containing multiple SQL statements.""" + # We don't know whether this mutates, but quite likely it does. + self._mutated = True self.db._connection().executescript(statements) -class Database(object): +class Database: """A container for Model objects that wraps an SQLite database as the backend. """ + _models = () """The Model subclasses representing tables in this database. """ + supports_extensions = hasattr(sqlite3.Connection, 'enable_load_extension') + """Whether or not the current version of SQLite supports extensions""" + + revision = 0 + """The current revision of the database. To be increased whenever + data is written in a transaction. + """ + def __init__(self, path, timeout=5.0): self.path = path self.timeout = timeout self._connections = {} self._tx_stacks = defaultdict(list) + self._extensions = [] # A lock to protect the _connections and _tx_stacks maps, which # both map thread IDs to private resources. @@ -794,6 +957,13 @@ class Database(object): py3_path(self.path), timeout=self.timeout ) + if self.supports_extensions: + conn.enable_load_extension(True) + + # Load any extension that are already loaded for other connections. + for path in self._extensions: + conn.load_extension(path) + # Access SELECT results like dictionaries. conn.row_factory = sqlite3.Row return conn @@ -822,6 +992,18 @@ class Database(object): """ return Transaction(self) + def load_extension(self, path): + """Load an SQLite extension into all open connections.""" + if not self.supports_extensions: + raise ValueError( + 'this sqlite3 installation does not support extensions') + + self._extensions.append(path) + + # Load the extension into every open connection. + for conn in self._connections.values(): + conn.load_extension(path) + # Schema setup and migration. def _make_table(self, table, fields): @@ -831,7 +1013,7 @@ class Database(object): # Get current schema. with self.transaction() as tx: rows = tx.query('PRAGMA table_info(%s)' % table) - current_fields = set([row[1] for row in rows]) + current_fields = {row[1] for row in rows} field_names = set(fields.keys()) if current_fields.issuperset(field_names): @@ -842,9 +1024,9 @@ class Database(object): # No table exists. columns = [] for name, typ in fields.items(): - columns.append('{0} {1}'.format(name, typ.sql)) - setup_sql = 'CREATE TABLE {0} ({1});\n'.format(table, - ', '.join(columns)) + columns.append(f'{name} {typ.sql}') + setup_sql = 'CREATE TABLE {} ({});\n'.format(table, + ', '.join(columns)) else: # Table exists does not match the field set. @@ -852,7 +1034,7 @@ class Database(object): for name, typ in fields.items(): if name in current_fields: continue - setup_sql += 'ALTER TABLE {0} ADD COLUMN {1} {2};\n'.format( + setup_sql += 'ALTER TABLE {} ADD COLUMN {} {};\n'.format( table, name, typ.sql ) @@ -888,17 +1070,31 @@ class Database(object): where, subvals = query.clause() order_by = sort.order_clause() - sql = ("SELECT * FROM {0} WHERE {1} {2}").format( + sql = ("SELECT * FROM {} WHERE {} {}").format( model_cls._table, where or '1', - "ORDER BY {0}".format(order_by) if order_by else '', + f"ORDER BY {order_by}" if order_by else '', + ) + + # Fetch flexible attributes for items matching the main query. + # Doing the per-item filtering in python is faster than issuing + # one query per item to sqlite. + flex_sql = (""" + SELECT * FROM {} WHERE entity_id IN + (SELECT id FROM {} WHERE {}); + """.format( + model_cls._flex_table, + model_cls._table, + where or '1', + ) ) with self.transaction() as tx: rows = tx.query(sql, subvals) + flex_rows = tx.query(flex_sql, subvals) return Results( - model_cls, rows, self, + model_cls, rows, self, flex_rows, None if where else query, # Slow query component. sort if sort.is_slow() else None, # Slow sort component. ) diff --git a/libs/common/beets/dbcore/query.py b/libs/common/beets/dbcore/query.py index 8fb64e20..96476a5b 100644 --- a/libs/common/beets/dbcore/query.py +++ b/libs/common/beets/dbcore/query.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,7 +14,6 @@ """The Query type hierarchy for DBCore. """ -from __future__ import division, absolute_import, print_function import re from operator import mul @@ -23,10 +21,6 @@ from beets import util from datetime import datetime, timedelta import unicodedata from functools import reduce -import six - -if not six.PY2: - buffer = memoryview # sqlite won't accept memoryview in python 2 class ParsingError(ValueError): @@ -44,8 +38,8 @@ class InvalidQueryError(ParsingError): def __init__(self, query, explanation): if isinstance(query, list): query = " ".join(query) - message = u"'{0}': {1}".format(query, explanation) - super(InvalidQueryError, self).__init__(message) + message = f"'{query}': {explanation}" + super().__init__(message) class InvalidQueryArgumentValueError(ParsingError): @@ -56,13 +50,13 @@ class InvalidQueryArgumentValueError(ParsingError): """ def __init__(self, what, expected, detail=None): - message = u"'{0}' is not {1}".format(what, expected) + message = f"'{what}' is not {expected}" if detail: - message = u"{0}: {1}".format(message, detail) - super(InvalidQueryArgumentValueError, self).__init__(message) + message = f"{message}: {detail}" + super().__init__(message) -class Query(object): +class Query: """An abstract class representing a query into the item database. """ @@ -82,7 +76,7 @@ class Query(object): raise NotImplementedError def __repr__(self): - return "{0.__class__.__name__}()".format(self) + return f"{self.__class__.__name__}()" def __eq__(self, other): return type(self) == type(other) @@ -129,7 +123,7 @@ class FieldQuery(Query): "{0.fast})".format(self)) def __eq__(self, other): - return super(FieldQuery, self).__eq__(other) and \ + return super().__eq__(other) and \ self.field == other.field and self.pattern == other.pattern def __hash__(self): @@ -151,17 +145,13 @@ class NoneQuery(FieldQuery): """A query that checks whether a field is null.""" def __init__(self, field, fast=True): - super(NoneQuery, self).__init__(field, None, fast) + super().__init__(field, None, fast) def col_clause(self): return self.field + " IS NULL", () - @classmethod - def match(cls, item): - try: - return item[cls.field] is None - except KeyError: - return True + def match(self, item): + return item.get(self.field) is None def __repr__(self): return "{0.__class__.__name__}({0.field!r}, {0.fast})".format(self) @@ -214,14 +204,14 @@ class RegexpQuery(StringFieldQuery): """ def __init__(self, field, pattern, fast=True): - super(RegexpQuery, self).__init__(field, pattern, fast) + super().__init__(field, pattern, fast) pattern = self._normalize(pattern) try: self.pattern = re.compile(self.pattern) except re.error as exc: # Invalid regular expression. raise InvalidQueryArgumentValueError(pattern, - u"a regular expression", + "a regular expression", format(exc)) @staticmethod @@ -242,8 +232,8 @@ class BooleanQuery(MatchQuery): """ def __init__(self, field, pattern, fast=True): - super(BooleanQuery, self).__init__(field, pattern, fast) - if isinstance(pattern, six.string_types): + super().__init__(field, pattern, fast) + if isinstance(pattern, str): self.pattern = util.str2bool(pattern) self.pattern = int(self.pattern) @@ -256,16 +246,16 @@ class BytesQuery(MatchQuery): """ def __init__(self, field, pattern): - super(BytesQuery, self).__init__(field, pattern) + super().__init__(field, pattern) # Use a buffer/memoryview representation of the pattern for SQLite # matching. This instructs SQLite to treat the blob as binary # rather than encoded Unicode. - if isinstance(self.pattern, (six.text_type, bytes)): - if isinstance(self.pattern, six.text_type): + if isinstance(self.pattern, (str, bytes)): + if isinstance(self.pattern, str): self.pattern = self.pattern.encode('utf-8') - self.buf_pattern = buffer(self.pattern) - elif isinstance(self.pattern, buffer): + self.buf_pattern = memoryview(self.pattern) + elif isinstance(self.pattern, memoryview): self.buf_pattern = self.pattern self.pattern = bytes(self.pattern) @@ -297,10 +287,10 @@ class NumericQuery(FieldQuery): try: return float(s) except ValueError: - raise InvalidQueryArgumentValueError(s, u"an int or a float") + raise InvalidQueryArgumentValueError(s, "an int or a float") def __init__(self, field, pattern, fast=True): - super(NumericQuery, self).__init__(field, pattern, fast) + super().__init__(field, pattern, fast) parts = pattern.split('..', 1) if len(parts) == 1: @@ -318,7 +308,7 @@ class NumericQuery(FieldQuery): if self.field not in item: return False value = item[self.field] - if isinstance(value, six.string_types): + if isinstance(value, str): value = self._convert(value) if self.point is not None: @@ -335,14 +325,14 @@ class NumericQuery(FieldQuery): return self.field + '=?', (self.point,) else: if self.rangemin is not None and self.rangemax is not None: - return (u'{0} >= ? AND {0} <= ?'.format(self.field), + return ('{0} >= ? AND {0} <= ?'.format(self.field), (self.rangemin, self.rangemax)) elif self.rangemin is not None: - return u'{0} >= ?'.format(self.field), (self.rangemin,) + return f'{self.field} >= ?', (self.rangemin,) elif self.rangemax is not None: - return u'{0} <= ?'.format(self.field), (self.rangemax,) + return f'{self.field} <= ?', (self.rangemax,) else: - return u'1', () + return '1', () class CollectionQuery(Query): @@ -387,7 +377,7 @@ class CollectionQuery(Query): return "{0.__class__.__name__}({0.subqueries!r})".format(self) def __eq__(self, other): - return super(CollectionQuery, self).__eq__(other) and \ + return super().__eq__(other) and \ self.subqueries == other.subqueries def __hash__(self): @@ -411,7 +401,7 @@ class AnyFieldQuery(CollectionQuery): subqueries = [] for field in self.fields: subqueries.append(cls(field, pattern, True)) - super(AnyFieldQuery, self).__init__(subqueries) + super().__init__(subqueries) def clause(self): return self.clause_with_joiner('or') @@ -427,7 +417,7 @@ class AnyFieldQuery(CollectionQuery): "{0.query_class.__name__})".format(self)) def __eq__(self, other): - return super(AnyFieldQuery, self).__eq__(other) and \ + return super().__eq__(other) and \ self.query_class == other.query_class def __hash__(self): @@ -453,7 +443,7 @@ class AndQuery(MutableCollectionQuery): return self.clause_with_joiner('and') def match(self, item): - return all([q.match(item) for q in self.subqueries]) + return all(q.match(item) for q in self.subqueries) class OrQuery(MutableCollectionQuery): @@ -463,7 +453,7 @@ class OrQuery(MutableCollectionQuery): return self.clause_with_joiner('or') def match(self, item): - return any([q.match(item) for q in self.subqueries]) + return any(q.match(item) for q in self.subqueries) class NotQuery(Query): @@ -477,7 +467,7 @@ class NotQuery(Query): def clause(self): clause, subvals = self.subquery.clause() if clause: - return 'not ({0})'.format(clause), subvals + return f'not ({clause})', subvals else: # If there is no clause, there is nothing to negate. All the logic # is handled by match() for slow queries. @@ -490,7 +480,7 @@ class NotQuery(Query): return "{0.__class__.__name__}({0.subquery!r})".format(self) def __eq__(self, other): - return super(NotQuery, self).__eq__(other) and \ + return super().__eq__(other) and \ self.subquery == other.subquery def __hash__(self): @@ -546,7 +536,7 @@ def _parse_periods(pattern): return (start, end) -class Period(object): +class Period: """A period of time given by a date, time and precision. Example: 2014-01-01 10:50:30 with precision 'month' represents all @@ -572,7 +562,7 @@ class Period(object): or "second"). """ if precision not in Period.precisions: - raise ValueError(u'Invalid precision {0}'.format(precision)) + raise ValueError(f'Invalid precision {precision}') self.date = date self.precision = precision @@ -653,10 +643,10 @@ class Period(object): elif 'second' == precision: return date + timedelta(seconds=1) else: - raise ValueError(u'unhandled precision {0}'.format(precision)) + raise ValueError(f'unhandled precision {precision}') -class DateInterval(object): +class DateInterval: """A closed-open interval of dates. A left endpoint of None means since the beginning of time. @@ -665,7 +655,7 @@ class DateInterval(object): def __init__(self, start, end): if start is not None and end is not None and not start < end: - raise ValueError(u"start date {0} is not before end date {1}" + raise ValueError("start date {} is not before end date {}" .format(start, end)) self.start = start self.end = end @@ -686,7 +676,7 @@ class DateInterval(object): return True def __str__(self): - return '[{0}, {1})'.format(self.start, self.end) + return f'[{self.start}, {self.end})' class DateQuery(FieldQuery): @@ -700,7 +690,7 @@ class DateQuery(FieldQuery): """ def __init__(self, field, pattern, fast=True): - super(DateQuery, self).__init__(field, pattern, fast) + super().__init__(field, pattern, fast) start, end = _parse_periods(pattern) self.interval = DateInterval.from_periods(start, end) @@ -759,12 +749,12 @@ class DurationQuery(NumericQuery): except ValueError: raise InvalidQueryArgumentValueError( s, - u"a M:SS string or a float") + "a M:SS string or a float") # Sorting. -class Sort(object): +class Sort: """An abstract class representing a sort operation for a query into the item database. """ @@ -851,13 +841,13 @@ class MultipleSort(Sort): return items def __repr__(self): - return 'MultipleSort({!r})'.format(self.sorts) + return f'MultipleSort({self.sorts!r})' def __hash__(self): return hash(tuple(self.sorts)) def __eq__(self, other): - return super(MultipleSort, self).__eq__(other) and \ + return super().__eq__(other) and \ self.sorts == other.sorts @@ -878,14 +868,14 @@ class FieldSort(Sort): def key(item): field_val = item.get(self.field, '') - if self.case_insensitive and isinstance(field_val, six.text_type): + if self.case_insensitive and isinstance(field_val, str): field_val = field_val.lower() return field_val return sorted(objs, key=key, reverse=not self.ascending) def __repr__(self): - return '<{0}: {1}{2}>'.format( + return '<{}: {}{}>'.format( type(self).__name__, self.field, '+' if self.ascending else '-', @@ -895,7 +885,7 @@ class FieldSort(Sort): return hash((self.field, self.ascending)) def __eq__(self, other): - return super(FieldSort, self).__eq__(other) and \ + return super().__eq__(other) and \ self.field == other.field and \ self.ascending == other.ascending @@ -913,7 +903,7 @@ class FixedFieldSort(FieldSort): 'ELSE {0} END)'.format(self.field) else: field = self.field - return "{0} {1}".format(field, order) + return f"{field} {order}" class SlowFieldSort(FieldSort): diff --git a/libs/common/beets/dbcore/queryparse.py b/libs/common/beets/dbcore/queryparse.py index bc9cc77e..3bf02e4d 100644 --- a/libs/common/beets/dbcore/queryparse.py +++ b/libs/common/beets/dbcore/queryparse.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,12 +14,10 @@ """Parsing of strings into DBCore queries. """ -from __future__ import division, absolute_import, print_function import re import itertools from . import query -import beets PARSE_QUERY_PART_REGEX = re.compile( # Non-capturing optional segment for the keyword. @@ -89,7 +86,7 @@ def parse_query_part(part, query_classes={}, prefixes={}, assert match # Regex should always match negate = bool(match.group(1)) key = match.group(2) - term = match.group(3).replace('\:', ':') + term = match.group(3).replace('\\:', ':') # Check whether there's a prefix in the query and use the # corresponding query type. @@ -119,12 +116,13 @@ def construct_query_part(model_cls, prefixes, query_part): if not query_part: return query.TrueQuery() - # Use `model_cls` to build up a map from field names to `Query` - # classes. + # Use `model_cls` to build up a map from field (or query) names to + # `Query` classes. query_classes = {} for k, t in itertools.chain(model_cls._fields.items(), model_cls._types.items()): query_classes[k] = t.query + query_classes.update(model_cls._queries) # Non-field queries. # Parse the string. key, pattern, query_class, negate = \ @@ -137,26 +135,27 @@ def construct_query_part(model_cls, prefixes, query_part): # The query type matches a specific field, but none was # specified. So we use a version of the query that matches # any field. - q = query.AnyFieldQuery(pattern, model_cls._search_fields, - query_class) - if negate: - return query.NotQuery(q) - else: - return q + out_query = query.AnyFieldQuery(pattern, model_cls._search_fields, + query_class) else: # Non-field query type. - if negate: - return query.NotQuery(query_class(pattern)) - else: - return query_class(pattern) + out_query = query_class(pattern) - # Otherwise, this must be a `FieldQuery`. Use the field name to - # construct the query object. - key = key.lower() - q = query_class(key.lower(), pattern, key in model_cls._fields) + # Field queries get constructed according to the name of the field + # they are querying. + elif issubclass(query_class, query.FieldQuery): + key = key.lower() + out_query = query_class(key.lower(), pattern, key in model_cls._fields) + + # Non-field (named) query. + else: + out_query = query_class(pattern) + + # Apply negation. if negate: - return query.NotQuery(q) - return q + return query.NotQuery(out_query) + else: + return out_query def query_from_strings(query_cls, model_cls, prefixes, query_parts): @@ -172,11 +171,13 @@ def query_from_strings(query_cls, model_cls, prefixes, query_parts): return query_cls(subqueries) -def construct_sort_part(model_cls, part): +def construct_sort_part(model_cls, part, case_insensitive=True): """Create a `Sort` from a single string criterion. `model_cls` is the `Model` being queried. `part` is a single string - ending in ``+`` or ``-`` indicating the sort. + ending in ``+`` or ``-`` indicating the sort. `case_insensitive` + indicates whether or not the sort should be performed in a case + sensitive manner. """ assert part, "part must be a field name and + or -" field = part[:-1] @@ -185,7 +186,6 @@ def construct_sort_part(model_cls, part): assert direction in ('+', '-'), "part must end with + or -" is_ascending = direction == '+' - case_insensitive = beets.config['sort_case_insensitive'].get(bool) if field in model_cls._sorts: sort = model_cls._sorts[field](model_cls, is_ascending, case_insensitive) @@ -197,21 +197,23 @@ def construct_sort_part(model_cls, part): return sort -def sort_from_strings(model_cls, sort_parts): +def sort_from_strings(model_cls, sort_parts, case_insensitive=True): """Create a `Sort` from a list of sort criteria (strings). """ if not sort_parts: sort = query.NullSort() elif len(sort_parts) == 1: - sort = construct_sort_part(model_cls, sort_parts[0]) + sort = construct_sort_part(model_cls, sort_parts[0], case_insensitive) else: sort = query.MultipleSort() for part in sort_parts: - sort.add_sort(construct_sort_part(model_cls, part)) + sort.add_sort(construct_sort_part(model_cls, part, + case_insensitive)) return sort -def parse_sorted_query(model_cls, parts, prefixes={}): +def parse_sorted_query(model_cls, parts, prefixes={}, + case_insensitive=True): """Given a list of strings, create the `Query` and `Sort` that they represent. """ @@ -222,8 +224,8 @@ def parse_sorted_query(model_cls, parts, prefixes={}): # Split up query in to comma-separated subqueries, each representing # an AndQuery, which need to be joined together in one OrQuery subquery_parts = [] - for part in parts + [u',']: - if part.endswith(u','): + for part in parts + [',']: + if part.endswith(','): # Ensure we can catch "foo, bar" as well as "foo , bar" last_subquery_part = part[:-1] if last_subquery_part: @@ -237,8 +239,8 @@ def parse_sorted_query(model_cls, parts, prefixes={}): else: # Sort parts (1) end in + or -, (2) don't have a field, and # (3) consist of more than just the + or -. - if part.endswith((u'+', u'-')) \ - and u':' not in part \ + if part.endswith(('+', '-')) \ + and ':' not in part \ and len(part) > 1: sort_parts.append(part) else: @@ -246,5 +248,5 @@ def parse_sorted_query(model_cls, parts, prefixes={}): # Avoid needlessly wrapping single statements in an OR q = query.OrQuery(query_parts) if len(query_parts) > 1 else query_parts[0] - s = sort_from_strings(model_cls, sort_parts) + s = sort_from_strings(model_cls, sort_parts, case_insensitive) return q, s diff --git a/libs/common/beets/dbcore/types.py b/libs/common/beets/dbcore/types.py index b909904b..40f6a080 100644 --- a/libs/common/beets/dbcore/types.py +++ b/libs/common/beets/dbcore/types.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,25 +14,20 @@ """Representation of type information for DBCore model fields. """ -from __future__ import division, absolute_import, print_function from . import query from beets.util import str2bool -import six - -if not six.PY2: - buffer = memoryview # sqlite won't accept memoryview in python 2 # Abstract base. -class Type(object): +class Type: """An object encapsulating the type of a model field. Includes information about how to store, query, format, and parse a given field. """ - sql = u'TEXT' + sql = 'TEXT' """The SQLite column type for the value. """ @@ -41,7 +35,7 @@ class Type(object): """The `Query` subclass to be used when querying the field. """ - model_type = six.text_type + model_type = str """The Python type that is used to represent the value in the model. The model is guaranteed to return a value of this type if the field @@ -63,11 +57,11 @@ class Type(object): value = self.null # `self.null` might be `None` if value is None: - value = u'' + value = '' if isinstance(value, bytes): value = value.decode('utf-8', 'ignore') - return six.text_type(value) + return str(value) def parse(self, string): """Parse a (possibly human-written) string and return the @@ -97,16 +91,16 @@ class Type(object): For fixed fields the type of `value` is determined by the column type affinity given in the `sql` property and the SQL to Python mapping of the database adapter. For more information see: - http://www.sqlite.org/datatype3.html + https://www.sqlite.org/datatype3.html https://docs.python.org/2/library/sqlite3.html#sqlite-and-python-types Flexible fields have the type affinity `TEXT`. This means the - `sql_value` is either a `buffer`/`memoryview` or a `unicode` object` + `sql_value` is either a `memoryview` or a `unicode` object` and the method must handle these in addition. """ - if isinstance(sql_value, buffer): + if isinstance(sql_value, memoryview): sql_value = bytes(sql_value).decode('utf-8', 'ignore') - if isinstance(sql_value, six.text_type): + if isinstance(sql_value, str): return self.parse(sql_value) else: return self.normalize(sql_value) @@ -127,10 +121,18 @@ class Default(Type): class Integer(Type): """A basic integer type. """ - sql = u'INTEGER' + sql = 'INTEGER' query = query.NumericQuery model_type = int + def normalize(self, value): + try: + return self.model_type(round(float(value))) + except ValueError: + return self.null + except TypeError: + return self.null + class PaddedInt(Integer): """An integer field that is formatted with a given number of digits, @@ -140,19 +142,25 @@ class PaddedInt(Integer): self.digits = digits def format(self, value): - return u'{0:0{1}d}'.format(value or 0, self.digits) + return '{0:0{1}d}'.format(value or 0, self.digits) + + +class NullPaddedInt(PaddedInt): + """Same as `PaddedInt`, but does not normalize `None` to `0.0`. + """ + null = None class ScaledInt(Integer): """An integer whose formatting operation scales the number by a constant and adds a suffix. Good for units with large magnitudes. """ - def __init__(self, unit, suffix=u''): + def __init__(self, unit, suffix=''): self.unit = unit self.suffix = suffix def format(self, value): - return u'{0}{1}'.format((value or 0) // self.unit, self.suffix) + return '{}{}'.format((value or 0) // self.unit, self.suffix) class Id(Integer): @@ -163,18 +171,22 @@ class Id(Integer): def __init__(self, primary=True): if primary: - self.sql = u'INTEGER PRIMARY KEY' + self.sql = 'INTEGER PRIMARY KEY' class Float(Type): - """A basic floating-point type. + """A basic floating-point type. The `digits` parameter specifies how + many decimal places to use in the human-readable representation. """ - sql = u'REAL' + sql = 'REAL' query = query.NumericQuery model_type = float + def __init__(self, digits=1): + self.digits = digits + def format(self, value): - return u'{0:.1f}'.format(value or 0.0) + return '{0:.{1}f}'.format(value or 0, self.digits) class NullFloat(Float): @@ -186,19 +198,25 @@ class NullFloat(Float): class String(Type): """A Unicode string type. """ - sql = u'TEXT' + sql = 'TEXT' query = query.SubstringQuery + def normalize(self, value): + if value is None: + return self.null + else: + return self.model_type(value) + class Boolean(Type): """A boolean type. """ - sql = u'INTEGER' + sql = 'INTEGER' query = query.BooleanQuery model_type = bool def format(self, value): - return six.text_type(bool(value)) + return str(bool(value)) def parse(self, string): return str2bool(string) diff --git a/libs/common/beets/importer.py b/libs/common/beets/importer.py index 4e4084ee..561cedd2 100644 --- a/libs/common/beets/importer.py +++ b/libs/common/beets/importer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -13,7 +12,6 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function """Provides the basic, interface-agnostic workflow for importing and autotagging music files. @@ -40,7 +38,7 @@ from beets import config from beets.util import pipeline, sorted_walk, ancestry, MoveOperation from beets.util import syspath, normpath, displayable_path from enum import Enum -from beets import mediafile +import mediafile action = Enum('action', ['SKIP', 'ASIS', 'TRACKS', 'APPLY', 'ALBUMS', 'RETAG']) @@ -75,7 +73,7 @@ def _open_state(): # unpickling, including ImportError. We use a catch-all # exception to avoid enumerating them all (the docs don't even have a # full list!). - log.debug(u'state file could not be read: {0}', exc) + log.debug('state file could not be read: {0}', exc) return {} @@ -84,8 +82,8 @@ def _save_state(state): try: with open(config['statefile'].as_filename(), 'wb') as f: pickle.dump(state, f) - except IOError as exc: - log.error(u'state file could not be written: {0}', exc) + except OSError as exc: + log.error('state file could not be written: {0}', exc) # Utilities for reading and writing the beets progress file, which @@ -174,10 +172,11 @@ def history_get(): # Abstract session class. -class ImportSession(object): +class ImportSession: """Controls an import action. Subclasses should implement methods to communicate with the user or otherwise make decisions. """ + def __init__(self, lib, loghandler, paths, query): """Create a session. `lib` is a Library object. `loghandler` is a logging.Handler. Either `paths` or `query` is non-null and indicates @@ -187,7 +186,7 @@ class ImportSession(object): self.logger = self._setup_logging(loghandler) self.paths = paths self.query = query - self._is_resuming = dict() + self._is_resuming = {} self._merged_items = set() self._merged_dirs = set() @@ -222,19 +221,31 @@ class ImportSession(object): iconfig['resume'] = False iconfig['incremental'] = False - # Copy, move, link, and hardlink are mutually exclusive. + if iconfig['reflink']: + iconfig['reflink'] = iconfig['reflink'] \ + .as_choice(['auto', True, False]) + + # Copy, move, reflink, link, and hardlink are mutually exclusive. if iconfig['move']: iconfig['copy'] = False iconfig['link'] = False iconfig['hardlink'] = False + iconfig['reflink'] = False elif iconfig['link']: iconfig['copy'] = False iconfig['move'] = False iconfig['hardlink'] = False + iconfig['reflink'] = False elif iconfig['hardlink']: iconfig['copy'] = False iconfig['move'] = False iconfig['link'] = False + iconfig['reflink'] = False + elif iconfig['reflink']: + iconfig['copy'] = False + iconfig['move'] = False + iconfig['link'] = False + iconfig['hardlink'] = False # Only delete when copying. if not iconfig['copy']: @@ -246,7 +257,7 @@ class ImportSession(object): """Log a message about a given album to the importer log. The status should reflect the reason the album couldn't be tagged. """ - self.logger.info(u'{0} {1}', status, displayable_path(paths)) + self.logger.info('{0} {1}', status, displayable_path(paths)) def log_choice(self, task, duplicate=False): """Logs the task's current choice if it should be logged. If @@ -257,17 +268,17 @@ class ImportSession(object): if duplicate: # Duplicate: log all three choices (skip, keep both, and trump). if task.should_remove_duplicates: - self.tag_log(u'duplicate-replace', paths) + self.tag_log('duplicate-replace', paths) elif task.choice_flag in (action.ASIS, action.APPLY): - self.tag_log(u'duplicate-keep', paths) + self.tag_log('duplicate-keep', paths) elif task.choice_flag is (action.SKIP): - self.tag_log(u'duplicate-skip', paths) + self.tag_log('duplicate-skip', paths) else: # Non-duplicate: log "skip" and "asis" choices. if task.choice_flag is action.ASIS: - self.tag_log(u'asis', paths) + self.tag_log('asis', paths) elif task.choice_flag is action.SKIP: - self.tag_log(u'skip', paths) + self.tag_log('skip', paths) def should_resume(self, path): raise NotImplementedError @@ -284,7 +295,7 @@ class ImportSession(object): def run(self): """Run the import task. """ - self.logger.info(u'import started {0}', time.asctime()) + self.logger.info('import started {0}', time.asctime()) self.set_config(config['import']) # Set up the pipeline. @@ -368,8 +379,8 @@ class ImportSession(object): """Mark paths and directories as merged for future reimport tasks. """ self._merged_items.update(paths) - dirs = set([os.path.dirname(path) if os.path.isfile(path) else path - for path in paths]) + dirs = {os.path.dirname(path) if os.path.isfile(path) else path + for path in paths} self._merged_dirs.update(dirs) def is_resuming(self, toppath): @@ -389,7 +400,7 @@ class ImportSession(object): # Either accept immediately or prompt for input to decide. if self.want_resume is True or \ self.should_resume(toppath): - log.warning(u'Resuming interrupted import of {0}', + log.warning('Resuming interrupted import of {0}', util.displayable_path(toppath)) self._is_resuming[toppath] = True else: @@ -399,11 +410,12 @@ class ImportSession(object): # The importer task class. -class BaseImportTask(object): +class BaseImportTask: """An abstract base class for importer tasks. Tasks flow through the importer pipeline. Each stage can update them. """ + def __init__(self, toppath, paths, items): """Create a task. The primary fields that define a task are: @@ -457,8 +469,9 @@ class ImportTask(BaseImportTask): * `finalize()` Update the import progress and cleanup the file system. """ + def __init__(self, toppath, paths, items): - super(ImportTask, self).__init__(toppath, paths, items) + super().__init__(toppath, paths, items) self.choice_flag = None self.cur_album = None self.cur_artist = None @@ -550,28 +563,34 @@ class ImportTask(BaseImportTask): def remove_duplicates(self, lib): duplicate_items = self.duplicate_items(lib) - log.debug(u'removing {0} old duplicated items', len(duplicate_items)) + log.debug('removing {0} old duplicated items', len(duplicate_items)) for item in duplicate_items: item.remove() if lib.directory in util.ancestry(item.path): - log.debug(u'deleting duplicate {0}', + log.debug('deleting duplicate {0}', util.displayable_path(item.path)) util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) - def set_fields(self): + def set_fields(self, lib): """Sets the fields given at CLI or configuration to the specified - values. + values, for both the album and all its items. """ + items = self.imported_items() for field, view in config['import']['set_fields'].items(): value = view.get() - log.debug(u'Set field {1}={2} for {0}', + log.debug('Set field {1}={2} for {0}', displayable_path(self.paths), field, value) self.album[field] = value - self.album.store() + for item in items: + item[field] = value + with lib.transaction(): + for item in items: + item.store() + self.album.store() def finalize(self, session): """Save progress, clean up files, and emit plugin event. @@ -655,7 +674,7 @@ class ImportTask(BaseImportTask): return [] duplicates = [] - task_paths = set(i.path for i in self.items if i) + task_paths = {i.path for i in self.items if i} duplicate_query = dbcore.AndQuery(( dbcore.MatchQuery('albumartist', artist), dbcore.MatchQuery('album', album), @@ -665,7 +684,7 @@ class ImportTask(BaseImportTask): # Check whether the album paths are all present in the task # i.e. album is being completely re-imported by the task, # in which case it is not a duplicate (will be replaced). - album_paths = set(i.path for i in album.items()) + album_paths = {i.path for i in album.items()} if not (album_paths <= task_paths): duplicates.append(album) return duplicates @@ -707,7 +726,7 @@ class ImportTask(BaseImportTask): item.update(changes) def manipulate_files(self, operation=None, write=False, session=None): - """ Copy, move, link or hardlink (depending on `operation`) the files + """ Copy, move, link, hardlink or reflink (depending on `operation`) the files as well as write metadata. `operation` should be an instance of `util.MoveOperation`. @@ -754,6 +773,8 @@ class ImportTask(BaseImportTask): self.record_replaced(lib) self.remove_replaced(lib) self.album = lib.add_album(self.imported_items()) + if 'data_source' in self.imported_items()[0]: + self.album.data_source = self.imported_items()[0].data_source self.reimport_metadata(lib) def record_replaced(self, lib): @@ -772,7 +793,7 @@ class ImportTask(BaseImportTask): if (not dup_item.album_id or dup_item.album_id in replaced_album_ids): continue - replaced_album = dup_item.get_album() + replaced_album = dup_item._cached_album if replaced_album: replaced_album_ids.add(dup_item.album_id) self.replaced_albums[replaced_album.path] = replaced_album @@ -789,8 +810,8 @@ class ImportTask(BaseImportTask): self.album.artpath = replaced_album.artpath self.album.store() log.debug( - u'Reimported album: added {0}, flexible ' - u'attributes {1} from album {2} for {3}', + 'Reimported album: added {0}, flexible ' + 'attributes {1} from album {2} for {3}', self.album.added, replaced_album._values_flex.keys(), replaced_album.id, @@ -803,16 +824,16 @@ class ImportTask(BaseImportTask): if dup_item.added and dup_item.added != item.added: item.added = dup_item.added log.debug( - u'Reimported item added {0} ' - u'from item {1} for {2}', + 'Reimported item added {0} ' + 'from item {1} for {2}', item.added, dup_item.id, displayable_path(item.path) ) item.update(dup_item._values_flex) log.debug( - u'Reimported item flexible attributes {0} ' - u'from item {1} for {2}', + 'Reimported item flexible attributes {0} ' + 'from item {1} for {2}', dup_item._values_flex.keys(), dup_item.id, displayable_path(item.path) @@ -825,10 +846,10 @@ class ImportTask(BaseImportTask): """ for item in self.imported_items(): for dup_item in self.replaced_items[item]: - log.debug(u'Replacing item {0}: {1}', + log.debug('Replacing item {0}: {1}', dup_item.id, displayable_path(item.path)) dup_item.remove() - log.debug(u'{0} of {1} items replaced', + log.debug('{0} of {1} items replaced', sum(bool(l) for l in self.replaced_items.values()), len(self.imported_items())) @@ -866,7 +887,7 @@ class SingletonImportTask(ImportTask): """ def __init__(self, toppath, item): - super(SingletonImportTask, self).__init__(toppath, [item.path], [item]) + super().__init__(toppath, [item.path], [item]) self.item = item self.is_album = False self.paths = [item.path] @@ -932,13 +953,13 @@ class SingletonImportTask(ImportTask): def reload(self): self.item.load() - def set_fields(self): + def set_fields(self, lib): """Sets the fields given at CLI or configuration to the specified - values. + values, for the singleton item. """ for field, view in config['import']['set_fields'].items(): value = view.get() - log.debug(u'Set field {1}={2} for {0}', + log.debug('Set field {1}={2} for {0}', displayable_path(self.paths), field, value) @@ -959,7 +980,7 @@ class SentinelImportTask(ImportTask): """ def __init__(self, toppath, paths): - super(SentinelImportTask, self).__init__(toppath, paths, ()) + super().__init__(toppath, paths, ()) # TODO Remove the remaining attributes eventually self.should_remove_duplicates = False self.is_album = True @@ -1003,7 +1024,7 @@ class ArchiveImportTask(SentinelImportTask): """ def __init__(self, toppath): - super(ArchiveImportTask, self).__init__(toppath, ()) + super().__init__(toppath, ()) self.extracted = False @classmethod @@ -1032,14 +1053,20 @@ class ArchiveImportTask(SentinelImportTask): cls._handlers = [] from zipfile import is_zipfile, ZipFile cls._handlers.append((is_zipfile, ZipFile)) - from tarfile import is_tarfile, TarFile - cls._handlers.append((is_tarfile, TarFile)) + import tarfile + cls._handlers.append((tarfile.is_tarfile, tarfile.open)) try: from rarfile import is_rarfile, RarFile except ImportError: pass else: cls._handlers.append((is_rarfile, RarFile)) + try: + from py7zr import is_7zfile, SevenZipFile + except ImportError: + pass + else: + cls._handlers.append((is_7zfile, SevenZipFile)) return cls._handlers @@ -1047,7 +1074,7 @@ class ArchiveImportTask(SentinelImportTask): """Removes the temporary directory the archive was extracted to. """ if self.extracted: - log.debug(u'Removing extracted directory: {0}', + log.debug('Removing extracted directory: {0}', displayable_path(self.toppath)) shutil.rmtree(self.toppath) @@ -1059,9 +1086,9 @@ class ArchiveImportTask(SentinelImportTask): if path_test(util.py3_path(self.toppath)): break + extract_to = mkdtemp() + archive = handler_class(util.py3_path(self.toppath), mode='r') try: - extract_to = mkdtemp() - archive = handler_class(util.py3_path(self.toppath), mode='r') archive.extractall(extract_to) finally: archive.close() @@ -1069,10 +1096,11 @@ class ArchiveImportTask(SentinelImportTask): self.toppath = extract_to -class ImportTaskFactory(object): +class ImportTaskFactory: """Generate album and singleton import tasks for all media files indicated by a path. """ + def __init__(self, toppath, session): """Create a new task factory. @@ -1110,14 +1138,12 @@ class ImportTaskFactory(object): if self.session.config['singletons']: for path in paths: tasks = self._create(self.singleton(path)) - for task in tasks: - yield task + yield from tasks yield self.sentinel(dirs) else: tasks = self._create(self.album(paths, dirs)) - for task in tasks: - yield task + yield from tasks # Produce the final sentinel for this toppath to indicate that # it is finished. This is usually just a SentinelImportTask, but @@ -1165,7 +1191,7 @@ class ImportTaskFactory(object): """Return a `SingletonImportTask` for the music file. """ if self.session.already_imported(self.toppath, [path]): - log.debug(u'Skipping previously-imported path: {0}', + log.debug('Skipping previously-imported path: {0}', displayable_path(path)) self.skipped += 1 return None @@ -1186,10 +1212,10 @@ class ImportTaskFactory(object): return None if dirs is None: - dirs = list(set(os.path.dirname(p) for p in paths)) + dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug(u'Skipping previously-imported path: {0}', + log.debug('Skipping previously-imported path: {0}', displayable_path(dirs)) self.skipped += 1 return None @@ -1219,22 +1245,22 @@ class ImportTaskFactory(object): if not (self.session.config['move'] or self.session.config['copy']): - log.warning(u"Archive importing requires either " - u"'copy' or 'move' to be enabled.") + log.warning("Archive importing requires either " + "'copy' or 'move' to be enabled.") return - log.debug(u'Extracting archive: {0}', + log.debug('Extracting archive: {0}', displayable_path(self.toppath)) archive_task = ArchiveImportTask(self.toppath) try: archive_task.extract() except Exception as exc: - log.error(u'extraction failed: {0}', exc) + log.error('extraction failed: {0}', exc) return # Now read albums from the extracted directory. self.toppath = archive_task.toppath - log.debug(u'Archive extracted to: {0}', self.toppath) + log.debug('Archive extracted to: {0}', self.toppath) return archive_task def read_item(self, path): @@ -1250,9 +1276,9 @@ class ImportTaskFactory(object): # Silently ignore non-music files. pass elif isinstance(exc.reason, mediafile.UnreadableFileError): - log.warning(u'unreadable file: {0}', displayable_path(path)) + log.warning('unreadable file: {0}', displayable_path(path)) else: - log.error(u'error reading {0}: {1}', + log.error('error reading {0}: {1}', displayable_path(path), exc) @@ -1291,17 +1317,16 @@ def read_tasks(session): # Generate tasks. task_factory = ImportTaskFactory(toppath, session) - for t in task_factory.tasks(): - yield t + yield from task_factory.tasks() skipped += task_factory.skipped if not task_factory.imported: - log.warning(u'No files imported from {0}', + log.warning('No files imported from {0}', displayable_path(toppath)) # Show skipped directories (due to incremental/resume). if skipped: - log.info(u'Skipped {0} paths.', skipped) + log.info('Skipped {0} paths.', skipped) def query_tasks(session): @@ -1319,7 +1344,7 @@ def query_tasks(session): else: # Search for albums. for album in session.lib.albums(session.query): - log.debug(u'yielding album {0}: {1} - {2}', + log.debug('yielding album {0}: {1} - {2}', album.id, album.albumartist, album.album) items = list(album.items()) _freshen_items(items) @@ -1342,7 +1367,7 @@ def lookup_candidates(session, task): return plugins.send('import_task_start', session=session, task=task) - log.debug(u'Looking up: {0}', displayable_path(task.paths)) + log.debug('Looking up: {0}', displayable_path(task.paths)) # Restrict the initial lookup to IDs specified by the user via the -m # option. Currently all the IDs are passed onto the tasks directly. @@ -1381,8 +1406,7 @@ def user_query(session, task): def emitter(task): for item in task.items: task = SingletonImportTask(task.toppath, item) - for new_task in task.handle_created(session): - yield new_task + yield from task.handle_created(session) yield SentinelImportTask(task.toppath, task.paths) return _extend_pipeline(emitter(task), @@ -1428,30 +1452,30 @@ def resolve_duplicates(session, task): if task.choice_flag in (action.ASIS, action.APPLY, action.RETAG): found_duplicates = task.find_duplicates(session.lib) if found_duplicates: - log.debug(u'found duplicates: {}'.format( + log.debug('found duplicates: {}'.format( [o.id for o in found_duplicates] )) # Get the default action to follow from config. duplicate_action = config['import']['duplicate_action'].as_choice({ - u'skip': u's', - u'keep': u'k', - u'remove': u'r', - u'merge': u'm', - u'ask': u'a', + 'skip': 's', + 'keep': 'k', + 'remove': 'r', + 'merge': 'm', + 'ask': 'a', }) - log.debug(u'default action for duplicates: {0}', duplicate_action) + log.debug('default action for duplicates: {0}', duplicate_action) - if duplicate_action == u's': + if duplicate_action == 's': # Skip new. task.set_choice(action.SKIP) - elif duplicate_action == u'k': + elif duplicate_action == 'k': # Keep both. Do nothing; leave the choice intact. pass - elif duplicate_action == u'r': + elif duplicate_action == 'r': # Remove old. task.should_remove_duplicates = True - elif duplicate_action == u'm': + elif duplicate_action == 'm': # Merge duplicates together task.should_merge_duplicates = True else: @@ -1471,7 +1495,7 @@ def import_asis(session, task): if task.skip: return - log.info(u'{}', displayable_path(task.paths)) + log.info('{}', displayable_path(task.paths)) task.set_choice(action.ASIS) apply_choice(session, task) @@ -1496,7 +1520,7 @@ def apply_choice(session, task): # because then the ``ImportTask`` won't have an `album` for which # it can set the fields. if config['import']['set_fields']: - task.set_fields() + task.set_fields(session.lib) @pipeline.mutator_stage @@ -1534,6 +1558,8 @@ def manipulate_files(session, task): operation = MoveOperation.LINK elif session.config['hardlink']: operation = MoveOperation.HARDLINK + elif session.config['reflink']: + operation = MoveOperation.REFLINK else: operation = None @@ -1552,11 +1578,11 @@ def log_files(session, task): """A coroutine (pipeline stage) to log each file to be imported. """ if isinstance(task, SingletonImportTask): - log.info(u'Singleton: {0}', displayable_path(task.item['path'])) + log.info('Singleton: {0}', displayable_path(task.item['path'])) elif task.items: - log.info(u'Album: {0}', displayable_path(task.paths[0])) + log.info('Album: {0}', displayable_path(task.paths[0])) for item in task.items: - log.info(u' {0}', displayable_path(item['path'])) + log.info(' {0}', displayable_path(item['path'])) def group_albums(session): diff --git a/libs/common/beets/library.py b/libs/common/beets/library.py index ba57407d..888836cd 100644 --- a/libs/common/beets/library.py +++ b/libs/common/beets/library.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,34 +14,30 @@ """The core data store and collection logic for beets. """ -from __future__ import division, absolute_import, print_function import os import sys import unicodedata import time import re -import six +import string +import shlex from beets import logging -from beets.mediafile import MediaFile, UnreadableFileError +from mediafile import MediaFile, UnreadableFileError from beets import plugins from beets import util from beets.util import bytestring_path, syspath, normpath, samefile, \ - MoveOperation -from beets.util.functemplate import Template + MoveOperation, lazy_property +from beets.util.functemplate import template, Template from beets import dbcore from beets.dbcore import types import beets # To use the SQLite "blob" type, it doesn't suffice to provide a byte -# string; SQLite treats that as encoded text. Wrapping it in a `buffer` or a -# `memoryview`, depending on the Python version, tells it that we -# actually mean non-text data. -if six.PY2: - BLOB_TYPE = buffer # noqa: F821 -else: - BLOB_TYPE = memoryview +# string; SQLite treats that as encoded text. Wrapping it in a +# `memoryview` tells it that we actually mean non-text data. +BLOB_TYPE = memoryview log = logging.getLogger('beets') @@ -64,7 +59,7 @@ class PathQuery(dbcore.FieldQuery): `case_sensitive` can be a bool or `None`, indicating that the behavior should depend on the filesystem. """ - super(PathQuery, self).__init__(field, pattern, fast) + super().__init__(field, pattern, fast) # By default, the case sensitivity depends on the filesystem # that the query path is located on. @@ -149,7 +144,7 @@ class PathType(types.Type): `bytes` objects, in keeping with the Unix filesystem abstraction. """ - sql = u'BLOB' + sql = 'BLOB' query = PathQuery model_type = bytes @@ -173,7 +168,7 @@ class PathType(types.Type): return normpath(bytestring_path(string)) def normalize(self, value): - if isinstance(value, six.text_type): + if isinstance(value, str): # Paths stored internally as encoded bytes. return bytestring_path(value) @@ -251,6 +246,7 @@ class SmartArtistSort(dbcore.query.Sort): """Sort by artist (either album artist or track artist), prioritizing the sort field over the raw field. """ + def __init__(self, model_cls, ascending=True, case_insensitive=True): self.album = model_cls is Album self.ascending = ascending @@ -266,12 +262,15 @@ class SmartArtistSort(dbcore.query.Sort): def sort(self, objs): if self.album: - field = lambda a: a.albumartist_sort or a.albumartist + def field(a): + return a.albumartist_sort or a.albumartist else: - field = lambda i: i.artist_sort or i.artist + def field(i): + return i.artist_sort or i.artist if self.case_insensitive: - key = lambda x: field(x).lower() + def key(x): + return field(x).lower() else: key = field return sorted(objs, key=key, reverse=not self.ascending) @@ -282,17 +281,17 @@ PF_KEY_DEFAULT = 'default' # Exceptions. -@six.python_2_unicode_compatible class FileOperationError(Exception): """Indicates an error when interacting with a file on disk. Possibilities include an unsupported media type, a permissions error, and an unhandled Mutagen exception. """ + def __init__(self, path, reason): """Create an exception describing an operation on the file at `path` with the underlying (chained) exception `reason`. """ - super(FileOperationError, self).__init__(path, reason) + super().__init__(path, reason) self.path = path self.reason = reason @@ -300,9 +299,9 @@ class FileOperationError(Exception): """Get a string representing the error. Describes both the underlying reason and the file path in question. """ - return u'{0}: {1}'.format( + return '{}: {}'.format( util.displayable_path(self.path), - six.text_type(self.reason) + str(self.reason) ) # define __str__ as text to avoid infinite loop on super() calls @@ -310,25 +309,24 @@ class FileOperationError(Exception): __str__ = text -@six.python_2_unicode_compatible class ReadError(FileOperationError): """An error while reading a file (i.e. in `Item.read`). """ + def __str__(self): - return u'error reading ' + super(ReadError, self).text() + return 'error reading ' + super().text() -@six.python_2_unicode_compatible class WriteError(FileOperationError): """An error while writing a file (i.e. in `Item.write`). """ + def __str__(self): - return u'error writing ' + super(WriteError, self).text() + return 'error writing ' + super().text() # Item and Album model classes. -@six.python_2_unicode_compatible class LibModel(dbcore.Model): """Shared concrete functionality for Items and Albums. """ @@ -343,21 +341,21 @@ class LibModel(dbcore.Model): return funcs def store(self, fields=None): - super(LibModel, self).store(fields) + super().store(fields) plugins.send('database_change', lib=self._db, model=self) def remove(self): - super(LibModel, self).remove() + super().remove() plugins.send('database_change', lib=self._db, model=self) def add(self, lib=None): - super(LibModel, self).add(lib) + super().add(lib) plugins.send('database_change', lib=self._db, model=self) def __format__(self, spec): if not spec: spec = beets.config[self._format_config_key].as_str() - assert isinstance(spec, six.text_type) + assert isinstance(spec, str) return self.evaluate_template(spec) def __str__(self): @@ -373,15 +371,42 @@ class FormattedItemMapping(dbcore.db.FormattedMapping): Album-level fields take precedence if `for_path` is true. """ - def __init__(self, item, for_path=False): - super(FormattedItemMapping, self).__init__(item, for_path) - self.album = item.get_album() - self.album_keys = [] + ALL_KEYS = '*' + + def __init__(self, item, included_keys=ALL_KEYS, for_path=False): + # We treat album and item keys specially here, + # so exclude transitive album keys from the model's keys. + super().__init__(item, included_keys=[], + for_path=for_path) + self.included_keys = included_keys + if included_keys == self.ALL_KEYS: + # Performance note: this triggers a database query. + self.model_keys = item.keys(computed=True, with_album=False) + else: + self.model_keys = included_keys + self.item = item + + @lazy_property + def all_keys(self): + return set(self.model_keys).union(self.album_keys) + + @lazy_property + def album_keys(self): + album_keys = [] if self.album: - for key in self.album.keys(True): - if key in Album.item_keys or key not in item._fields.keys(): - self.album_keys.append(key) - self.all_keys = set(self.model_keys).union(self.album_keys) + if self.included_keys == self.ALL_KEYS: + # Performance note: this triggers a database query. + for key in self.album.keys(computed=True): + if key in Album.item_keys \ + or key not in self.item._fields.keys(): + album_keys.append(key) + else: + album_keys = self.included_keys + return album_keys + + @property + def album(self): + return self.item._cached_album def _get(self, key): """Get the value for a key, either from the album or the item. @@ -397,19 +422,23 @@ class FormattedItemMapping(dbcore.db.FormattedMapping): raise KeyError(key) def __getitem__(self, key): - """Get the value for a key. Certain unset values are remapped. + """Get the value for a key. `artist` and `albumartist` + are fallback values for each other when not set. """ value = self._get(key) # `artist` and `albumartist` fields fall back to one another. # This is helpful in path formats when the album artist is unset # on as-is imports. - if key == 'artist' and not value: - return self._get('albumartist') - elif key == 'albumartist' and not value: - return self._get('artist') - else: - return value + try: + if key == 'artist' and not value: + return self._get('albumartist') + elif key == 'albumartist' and not value: + return self._get('artist') + except KeyError: + pass + + return value def __iter__(self): return iter(self.all_keys) @@ -422,74 +451,85 @@ class Item(LibModel): _table = 'items' _flex_table = 'item_attributes' _fields = { - 'id': types.PRIMARY_ID, - 'path': PathType(), + 'id': types.PRIMARY_ID, + 'path': PathType(), 'album_id': types.FOREIGN_ID, - 'title': types.STRING, - 'artist': types.STRING, - 'artist_sort': types.STRING, - 'artist_credit': types.STRING, - 'album': types.STRING, - 'albumartist': types.STRING, - 'albumartist_sort': types.STRING, - 'albumartist_credit': types.STRING, - 'genre': types.STRING, - 'lyricist': types.STRING, - 'composer': types.STRING, - 'composer_sort': types.STRING, - 'arranger': types.STRING, - 'grouping': types.STRING, - 'year': types.PaddedInt(4), - 'month': types.PaddedInt(2), - 'day': types.PaddedInt(2), - 'track': types.PaddedInt(2), - 'tracktotal': types.PaddedInt(2), - 'disc': types.PaddedInt(2), - 'disctotal': types.PaddedInt(2), - 'lyrics': types.STRING, - 'comments': types.STRING, - 'bpm': types.INTEGER, - 'comp': types.BOOLEAN, - 'mb_trackid': types.STRING, - 'mb_albumid': types.STRING, - 'mb_artistid': types.STRING, - 'mb_albumartistid': types.STRING, - 'mb_releasetrackid': types.STRING, - 'albumtype': types.STRING, - 'label': types.STRING, + 'title': types.STRING, + 'artist': types.STRING, + 'artist_sort': types.STRING, + 'artist_credit': types.STRING, + 'album': types.STRING, + 'albumartist': types.STRING, + 'albumartist_sort': types.STRING, + 'albumartist_credit': types.STRING, + 'genre': types.STRING, + 'style': types.STRING, + 'discogs_albumid': types.INTEGER, + 'discogs_artistid': types.INTEGER, + 'discogs_labelid': types.INTEGER, + 'lyricist': types.STRING, + 'composer': types.STRING, + 'composer_sort': types.STRING, + 'work': types.STRING, + 'mb_workid': types.STRING, + 'work_disambig': types.STRING, + 'arranger': types.STRING, + 'grouping': types.STRING, + 'year': types.PaddedInt(4), + 'month': types.PaddedInt(2), + 'day': types.PaddedInt(2), + 'track': types.PaddedInt(2), + 'tracktotal': types.PaddedInt(2), + 'disc': types.PaddedInt(2), + 'disctotal': types.PaddedInt(2), + 'lyrics': types.STRING, + 'comments': types.STRING, + 'bpm': types.INTEGER, + 'comp': types.BOOLEAN, + 'mb_trackid': types.STRING, + 'mb_albumid': types.STRING, + 'mb_artistid': types.STRING, + 'mb_albumartistid': types.STRING, + 'mb_releasetrackid': types.STRING, + 'trackdisambig': types.STRING, + 'albumtype': types.STRING, + 'albumtypes': types.STRING, + 'label': types.STRING, 'acoustid_fingerprint': types.STRING, - 'acoustid_id': types.STRING, - 'mb_releasegroupid': types.STRING, - 'asin': types.STRING, - 'catalognum': types.STRING, - 'script': types.STRING, - 'language': types.STRING, - 'country': types.STRING, - 'albumstatus': types.STRING, - 'media': types.STRING, - 'albumdisambig': types.STRING, - 'disctitle': types.STRING, - 'encoder': types.STRING, - 'rg_track_gain': types.NULL_FLOAT, - 'rg_track_peak': types.NULL_FLOAT, - 'rg_album_gain': types.NULL_FLOAT, - 'rg_album_peak': types.NULL_FLOAT, - 'r128_track_gain': types.PaddedInt(6), - 'r128_album_gain': types.PaddedInt(6), - 'original_year': types.PaddedInt(4), - 'original_month': types.PaddedInt(2), - 'original_day': types.PaddedInt(2), - 'initial_key': MusicalKey(), + 'acoustid_id': types.STRING, + 'mb_releasegroupid': types.STRING, + 'asin': types.STRING, + 'isrc': types.STRING, + 'catalognum': types.STRING, + 'script': types.STRING, + 'language': types.STRING, + 'country': types.STRING, + 'albumstatus': types.STRING, + 'media': types.STRING, + 'albumdisambig': types.STRING, + 'releasegroupdisambig': types.STRING, + 'disctitle': types.STRING, + 'encoder': types.STRING, + 'rg_track_gain': types.NULL_FLOAT, + 'rg_track_peak': types.NULL_FLOAT, + 'rg_album_gain': types.NULL_FLOAT, + 'rg_album_peak': types.NULL_FLOAT, + 'r128_track_gain': types.NullPaddedInt(6), + 'r128_album_gain': types.NullPaddedInt(6), + 'original_year': types.PaddedInt(4), + 'original_month': types.PaddedInt(2), + 'original_day': types.PaddedInt(2), + 'initial_key': MusicalKey(), - 'length': DurationType(), - 'bitrate': types.ScaledInt(1000, u'kbps'), - 'format': types.STRING, - 'samplerate': types.ScaledInt(1000, u'kHz'), - 'bitdepth': types.INTEGER, - 'channels': types.INTEGER, - 'mtime': DateType(), - 'added': DateType(), + 'length': DurationType(), + 'bitrate': types.ScaledInt(1000, 'kbps'), + 'format': types.STRING, + 'samplerate': types.ScaledInt(1000, 'kHz'), + 'bitdepth': types.INTEGER, + 'channels': types.INTEGER, + 'mtime': DateType(), + 'added': DateType(), } _search_fields = ('artist', 'title', 'comments', @@ -522,6 +562,29 @@ class Item(LibModel): _format_config_key = 'format_item' + __album = None + """Cached album object. Read-only.""" + + @property + def _cached_album(self): + """The Album object that this item belongs to, if any, or + None if the item is a singleton or is not associated with a + library. + The instance is cached and refreshed on access. + + DO NOT MODIFY! + If you want a copy to modify, use :meth:`get_album`. + """ + if not self.__album and self._db: + self.__album = self._db.get_album(self) + elif self.__album: + self.__album.load() + return self.__album + + @_cached_album.setter + def _cached_album(self, album): + self.__album = album + @classmethod def _getters(cls): getters = plugins.item_field_getters() @@ -544,27 +607,72 @@ class Item(LibModel): """ # Encode unicode paths and read buffers. if key == 'path': - if isinstance(value, six.text_type): + if isinstance(value, str): value = bytestring_path(value) elif isinstance(value, BLOB_TYPE): value = bytes(value) + elif key == 'album_id': + self._cached_album = None - changed = super(Item, self)._setitem(key, value) + changed = super()._setitem(key, value) if changed and key in MediaFile.fields(): self.mtime = 0 # Reset mtime on dirty. + def __getitem__(self, key): + """Get the value for a field, falling back to the album if + necessary. Raise a KeyError if the field is not available. + """ + try: + return super().__getitem__(key) + except KeyError: + if self._cached_album: + return self._cached_album[key] + raise + + def __repr__(self): + # This must not use `with_album=True`, because that might access + # the database. When debugging, that is not guaranteed to succeed, and + # can even deadlock due to the database lock. + return '{}({})'.format( + type(self).__name__, + ', '.join('{}={!r}'.format(k, self[k]) + for k in self.keys(with_album=False)), + ) + + def keys(self, computed=False, with_album=True): + """Get a list of available field names. `with_album` + controls whether the album's fields are included. + """ + keys = super().keys(computed=computed) + if with_album and self._cached_album: + keys = set(keys) + keys.update(self._cached_album.keys(computed=computed)) + keys = list(keys) + return keys + + def get(self, key, default=None, with_album=True): + """Get the value for a given key or `default` if it does not + exist. Set `with_album` to false to skip album fallback. + """ + try: + return self._get(key, default, raise_=with_album) + except KeyError: + if self._cached_album: + return self._cached_album.get(key, default) + return default + def update(self, values): """Set all key/value pairs in the mapping. If mtime is specified, it is not reset (as it might otherwise be). """ - super(Item, self).update(values) + super().update(values) if self.mtime == 0 and 'mtime' in values: self.mtime = values['mtime'] def clear(self): """Set all key/value pairs to None.""" - for key in self._media_fields: + for key in self._media_tag_fields: setattr(self, key, None) def get_album(self): @@ -598,7 +706,7 @@ class Item(LibModel): for key in self._media_fields: value = getattr(mediafile, key) - if isinstance(value, six.integer_types): + if isinstance(value, int): if value.bit_length() > 63: value = 0 self[key] = value @@ -609,7 +717,7 @@ class Item(LibModel): self.path = read_path - def write(self, path=None, tags=None): + def write(self, path=None, tags=None, id3v23=None): """Write the item's metadata to a media file. All fields in `_media_fields` are written to disk according to @@ -621,6 +729,9 @@ class Item(LibModel): `tags` is a dictionary of additional metadata the should be written to the file. (These tags need not be in `_media_fields`.) + `id3v23` will override the global `id3v23` config option if it is + set to something other than `None`. + Can raise either a `ReadError` or a `WriteError`. """ if path is None: @@ -628,6 +739,9 @@ class Item(LibModel): else: path = normpath(path) + if id3v23 is None: + id3v23 = beets.config['id3v23'].get(bool) + # Get the data to write to the file. item_tags = dict(self) item_tags = {k: v for k, v in item_tags.items() @@ -638,8 +752,7 @@ class Item(LibModel): # Open the file. try: - mediafile = MediaFile(syspath(path), - id3v23=beets.config['id3v23'].get(bool)) + mediafile = MediaFile(syspath(path), id3v23=id3v23) except UnreadableFileError as exc: raise ReadError(path, exc) @@ -655,17 +768,17 @@ class Item(LibModel): self.mtime = self.current_mtime() plugins.send('after_write', item=self, path=path) - def try_write(self, path=None, tags=None): + def try_write(self, *args, **kwargs): """Calls `write()` but catches and logs `FileOperationError` exceptions. Returns `False` an exception was caught and `True` otherwise. """ try: - self.write(path, tags) + self.write(*args, **kwargs) return True except FileOperationError as exc: - log.error(u"{0}", exc) + log.error("{0}", exc) return False def try_sync(self, write, move, with_album=True): @@ -685,7 +798,7 @@ class Item(LibModel): if move: # Check whether this file is inside the library directory. if self._db and self._db.directory in util.ancestry(self.path): - log.debug(u'moving {0} to synchronize path', + log.debug('moving {0} to synchronize path', util.displayable_path(self.path)) self.move(with_album=with_album) self.store() @@ -720,6 +833,16 @@ class Item(LibModel): util.hardlink(self.path, dest) plugins.send("item_hardlinked", item=self, source=self.path, destination=dest) + elif operation == MoveOperation.REFLINK: + util.reflink(self.path, dest, fallback=False) + plugins.send("item_reflinked", item=self, source=self.path, + destination=dest) + elif operation == MoveOperation.REFLINK_AUTO: + util.reflink(self.path, dest, fallback=True) + plugins.send("item_reflinked", item=self, source=self.path, + destination=dest) + else: + assert False, 'unknown MoveOperation' # Either copying or moving succeeded, so update the stored path. self.path = dest @@ -738,7 +861,7 @@ class Item(LibModel): try: return os.path.getsize(syspath(self.path)) except (OSError, Exception) as exc: - log.warning(u'could not get filesize: {0}', exc) + log.warning('could not get filesize: {0}', exc) return 0 # Model methods. @@ -748,7 +871,7 @@ class Item(LibModel): removed from disk. If `with_album`, then the item's album (if any) is removed if it the item was the last in the album. """ - super(Item, self).remove() + super().remove() # Remove the album if it is empty. if with_album: @@ -815,7 +938,7 @@ class Item(LibModel): # Templating. def destination(self, fragment=False, basedir=None, platform=None, - path_formats=None): + path_formats=None, replacements=None): """Returns the path in the library directory designated for the item (i.e., where the file ought to be). fragment makes this method return just the path fragment underneath the root library @@ -827,6 +950,8 @@ class Item(LibModel): platform = platform or sys.platform basedir = basedir or self._db.directory path_formats = path_formats or self._db.path_formats + if replacements is None: + replacements = self._db.replacements # Use a path format based on a query, falling back on the # default. @@ -844,11 +969,11 @@ class Item(LibModel): if query == PF_KEY_DEFAULT: break else: - assert False, u"no default path format" + assert False, "no default path format" if isinstance(path_format, Template): subpath_tmpl = path_format else: - subpath_tmpl = Template(path_format) + subpath_tmpl = template(path_format) # Evaluate the selected template. subpath = self.evaluate_template(subpath_tmpl, True) @@ -871,16 +996,16 @@ class Item(LibModel): maxlen = util.max_filename_length(self._db.directory) subpath, fellback = util.legalize_path( - subpath, self._db.replacements, maxlen, + subpath, replacements, maxlen, os.path.splitext(self.path)[1], fragment ) if fellback: # Print an error message if legalization fell back to # default replacements because of the maximum length. log.warning( - u'Fell back to default replacements when naming ' - u'file {}. Configure replacements to avoid lengthening ' - u'the filename.', + 'Fell back to default replacements when naming ' + 'file {}. Configure replacements to avoid lengthening ' + 'the filename.', subpath ) @@ -899,44 +1024,50 @@ class Album(LibModel): _flex_table = 'album_attributes' _always_dirty = True _fields = { - 'id': types.PRIMARY_ID, + 'id': types.PRIMARY_ID, 'artpath': PathType(True), - 'added': DateType(), + 'added': DateType(), - 'albumartist': types.STRING, - 'albumartist_sort': types.STRING, + 'albumartist': types.STRING, + 'albumartist_sort': types.STRING, 'albumartist_credit': types.STRING, - 'album': types.STRING, - 'genre': types.STRING, - 'year': types.PaddedInt(4), - 'month': types.PaddedInt(2), - 'day': types.PaddedInt(2), - 'disctotal': types.PaddedInt(2), - 'comp': types.BOOLEAN, - 'mb_albumid': types.STRING, - 'mb_albumartistid': types.STRING, - 'albumtype': types.STRING, - 'label': types.STRING, - 'mb_releasegroupid': types.STRING, - 'asin': types.STRING, - 'catalognum': types.STRING, - 'script': types.STRING, - 'language': types.STRING, - 'country': types.STRING, - 'albumstatus': types.STRING, - 'albumdisambig': types.STRING, - 'rg_album_gain': types.NULL_FLOAT, - 'rg_album_peak': types.NULL_FLOAT, - 'r128_album_gain': types.PaddedInt(6), - 'original_year': types.PaddedInt(4), - 'original_month': types.PaddedInt(2), - 'original_day': types.PaddedInt(2), + 'album': types.STRING, + 'genre': types.STRING, + 'style': types.STRING, + 'discogs_albumid': types.INTEGER, + 'discogs_artistid': types.INTEGER, + 'discogs_labelid': types.INTEGER, + 'year': types.PaddedInt(4), + 'month': types.PaddedInt(2), + 'day': types.PaddedInt(2), + 'disctotal': types.PaddedInt(2), + 'comp': types.BOOLEAN, + 'mb_albumid': types.STRING, + 'mb_albumartistid': types.STRING, + 'albumtype': types.STRING, + 'albumtypes': types.STRING, + 'label': types.STRING, + 'mb_releasegroupid': types.STRING, + 'asin': types.STRING, + 'catalognum': types.STRING, + 'script': types.STRING, + 'language': types.STRING, + 'country': types.STRING, + 'albumstatus': types.STRING, + 'albumdisambig': types.STRING, + 'releasegroupdisambig': types.STRING, + 'rg_album_gain': types.NULL_FLOAT, + 'rg_album_peak': types.NULL_FLOAT, + 'r128_album_gain': types.NullPaddedInt(6), + 'original_year': types.PaddedInt(4), + 'original_month': types.PaddedInt(2), + 'original_day': types.PaddedInt(2), } _search_fields = ('album', 'albumartist', 'genre') _types = { - 'path': PathType(), + 'path': PathType(), 'data_source': types.STRING, } @@ -952,6 +1083,10 @@ class Album(LibModel): 'albumartist_credit', 'album', 'genre', + 'style', + 'discogs_albumid', + 'discogs_artistid', + 'discogs_labelid', 'year', 'month', 'day', @@ -960,6 +1095,7 @@ class Album(LibModel): 'mb_albumid', 'mb_albumartistid', 'albumtype', + 'albumtypes', 'label', 'mb_releasegroupid', 'asin', @@ -969,6 +1105,7 @@ class Album(LibModel): 'country', 'albumstatus', 'albumdisambig', + 'releasegroupdisambig', 'rg_album_gain', 'rg_album_peak', 'r128_album_gain', @@ -1003,7 +1140,10 @@ class Album(LibModel): containing the album are also removed (recursively) if empty. Set with_items to False to avoid removing the album's items. """ - super(Album, self).remove() + super().remove() + + # Send a 'album_removed' signal to plugins + plugins.send('album_removed', album=self) # Delete art file. if delete: @@ -1027,12 +1167,18 @@ class Album(LibModel): if not old_art: return + if not os.path.exists(old_art): + log.error('removing reference to missing album art file {}', + util.displayable_path(old_art)) + self.artpath = None + return + new_art = self.art_destination(old_art) if new_art == old_art: return new_art = util.unique_path(new_art) - log.debug(u'moving album art {0} to {1}', + log.debug('moving album art {0} to {1}', util.displayable_path(old_art), util.displayable_path(new_art)) if operation == MoveOperation.MOVE: @@ -1044,6 +1190,12 @@ class Album(LibModel): util.link(old_art, new_art) elif operation == MoveOperation.HARDLINK: util.hardlink(old_art, new_art) + elif operation == MoveOperation.REFLINK: + util.reflink(old_art, new_art, fallback=False) + elif operation == MoveOperation.REFLINK_AUTO: + util.reflink(old_art, new_art, fallback=True) + else: + assert False, 'unknown MoveOperation' self.artpath = new_art def move(self, operation=MoveOperation.MOVE, basedir=None, store=True): @@ -1083,7 +1235,7 @@ class Album(LibModel): """ item = self.items().get() if not item: - raise ValueError(u'empty album') + raise ValueError('empty album for album id %d' % self.id) return os.path.dirname(item.path) def _albumtotal(self): @@ -1119,7 +1271,7 @@ class Album(LibModel): image = bytestring_path(image) item_dir = item_dir or self.item_dir() - filename_tmpl = Template( + filename_tmpl = template( beets.config['art_filename'].as_str()) subpath = self.evaluate_template(filename_tmpl, True) if beets.config['asciify_paths']: @@ -1180,7 +1332,7 @@ class Album(LibModel): track_updates[key] = self[key] with self._db.transaction(): - super(Album, self).store(fields) + super().store(fields) if track_updates: for item in self.items(): for key, value in track_updates.items(): @@ -1224,8 +1376,10 @@ def parse_query_parts(parts, model_cls): else: non_path_parts.append(s) + case_insensitive = beets.config['sort_case_insensitive'].get(bool) + query, sort = dbcore.parse_sorted_query( - model_cls, non_path_parts, prefixes + model_cls, non_path_parts, prefixes, case_insensitive ) # Add path queries to aggregate query. @@ -1243,10 +1397,10 @@ def parse_query_string(s, model_cls): The string is split into components using shell-like syntax. """ - message = u"Query is not unicode: {0!r}".format(s) - assert isinstance(s, six.text_type), message + message = f"Query is not unicode: {s!r}" + assert isinstance(s, str), message try: - parts = util.shlex_split(s) + parts = shlex.split(s) except ValueError as exc: raise dbcore.InvalidQueryError(s, exc) return parse_query_parts(parts, model_cls) @@ -1259,10 +1413,7 @@ def _sqlite_bytelower(bytestring): ``-DSQLITE_LIKE_DOESNT_MATCH_BLOBS``. See ``https://github.com/beetbox/beets/issues/2172`` for details. """ - if not six.PY2: - return bytestring.lower() - - return buffer(bytes(bytestring).lower()) # noqa: F821 + return bytestring.lower() # The Library: interface to the database. @@ -1278,7 +1429,7 @@ class Library(dbcore.Database): '$artist/$album/$track $title'),), replacements=None): timeout = beets.config['timeout'].as_number() - super(Library, self).__init__(path, timeout=timeout) + super().__init__(path, timeout=timeout) self.directory = bytestring_path(normpath(directory)) self.path_formats = path_formats @@ -1287,7 +1438,7 @@ class Library(dbcore.Database): self._memotable = {} # Used for template substitution performance. def _create_connection(self): - conn = super(Library, self)._create_connection() + conn = super()._create_connection() conn.create_function('bytelower', 1, _sqlite_bytelower) return conn @@ -1309,10 +1460,10 @@ class Library(dbcore.Database): be empty. """ if not items: - raise ValueError(u'need at least one item') + raise ValueError('need at least one item') # Create the album structure using metadata from the first item. - values = dict((key, items[0][key]) for key in Album.item_keys) + values = {key: items[0][key] for key in Album.item_keys} album = Album(self, **values) # Add the album structure and set the items' album_id fields. @@ -1337,7 +1488,7 @@ class Library(dbcore.Database): # Parse the query, if necessary. try: parsed_sort = None - if isinstance(query, six.string_types): + if isinstance(query, str): query, parsed_sort = parse_query_string(query, model_cls) elif isinstance(query, (list, tuple)): query, parsed_sort = parse_query_parts(query, model_cls) @@ -1349,7 +1500,7 @@ class Library(dbcore.Database): if parsed_sort and not isinstance(parsed_sort, dbcore.query.NullSort): sort = parsed_sort - return super(Library, self)._fetch( + return super()._fetch( model_cls, query, sort ) @@ -1408,7 +1559,7 @@ def _int_arg(s): return int(s.strip()) -class DefaultTemplateFunctions(object): +class DefaultTemplateFunctions: """A container class for the default functions provided to path templates. These functions are contained in an object to provide additional context to the functions -- specifically, the Item being @@ -1447,7 +1598,7 @@ class DefaultTemplateFunctions(object): @staticmethod def tmpl_title(s): """Convert a string to title case.""" - return s.title() + return string.capwords(s) @staticmethod def tmpl_left(s, chars): @@ -1460,7 +1611,7 @@ class DefaultTemplateFunctions(object): return s[-_int_arg(chars):] @staticmethod - def tmpl_if(condition, trueval, falseval=u''): + def tmpl_if(condition, trueval, falseval=''): """If ``condition`` is nonempty and nonzero, emit ``trueval``; otherwise, emit ``falseval`` (if provided). """ @@ -1502,18 +1653,25 @@ class DefaultTemplateFunctions(object): """ # Fast paths: no album, no item or library, or memoized value. if not self.item or not self.lib: - return u'' - if self.item.album_id is None: - return u'' - memokey = ('aunique', keys, disam, self.item.album_id) + return '' + + if isinstance(self.item, Item): + album_id = self.item.album_id + elif isinstance(self.item, Album): + album_id = self.item.id + + if album_id is None: + return '' + + memokey = ('aunique', keys, disam, album_id) memoval = self.lib._memotable.get(memokey) if memoval is not None: return memoval - keys = keys or 'albumartist album' - disam = disam or 'albumtype year label catalognum albumdisambig' + keys = keys or beets.config['aunique']['keys'].as_str() + disam = disam or beets.config['aunique']['disambiguators'].as_str() if bracket is None: - bracket = '[]' + bracket = beets.config['aunique']['bracket'].as_str() keys = keys.split() disam = disam.split() @@ -1522,32 +1680,34 @@ class DefaultTemplateFunctions(object): bracket_l = bracket[0] bracket_r = bracket[1] else: - bracket_l = u'' - bracket_r = u'' + bracket_l = '' + bracket_r = '' - album = self.lib.get_album(self.item) + album = self.lib.get_album(album_id) if not album: # Do nothing for singletons. - self.lib._memotable[memokey] = u'' - return u'' + self.lib._memotable[memokey] = '' + return '' # Find matching albums to disambiguate with. subqueries = [] for key in keys: value = album.get(key, '') - subqueries.append(dbcore.MatchQuery(key, value)) + # Use slow queries for flexible attributes. + fast = key in album.item_keys + subqueries.append(dbcore.MatchQuery(key, value, fast)) albums = self.lib.albums(dbcore.AndQuery(subqueries)) # If there's only one album to matching these details, then do # nothing. if len(albums) == 1: - self.lib._memotable[memokey] = u'' - return u'' + self.lib._memotable[memokey] = '' + return '' # Find the first disambiguator that distinguishes the albums. for disambiguator in disam: # Get the value for each album for the current field. - disam_values = set([a.get(disambiguator, '') for a in albums]) + disam_values = {a.get(disambiguator, '') for a in albums} # If the set of unique values is equal to the number of # albums in the disambiguation set, we're done -- this is @@ -1557,24 +1717,24 @@ class DefaultTemplateFunctions(object): else: # No disambiguator distinguished all fields. - res = u' {1}{0}{2}'.format(album.id, bracket_l, bracket_r) + res = f' {bracket_l}{album.id}{bracket_r}' self.lib._memotable[memokey] = res return res # Flatten disambiguation value into a string. - disam_value = album.formatted(True).get(disambiguator) + disam_value = album.formatted(for_path=True).get(disambiguator) # Return empty string if disambiguator is empty. if disam_value: - res = u' {1}{0}{2}'.format(disam_value, bracket_l, bracket_r) + res = f' {bracket_l}{disam_value}{bracket_r}' else: - res = u'' + res = '' self.lib._memotable[memokey] = res return res @staticmethod - def tmpl_first(s, count=1, skip=0, sep=u'; ', join_str=u'; '): + def tmpl_first(s, count=1, skip=0, sep='; ', join_str='; '): """ Gets the item(s) from x to y in a string separated by something and join then with something @@ -1588,7 +1748,7 @@ class DefaultTemplateFunctions(object): count = skip + int(count) return join_str.join(s.split(sep)[skip:count]) - def tmpl_ifdef(self, field, trueval=u'', falseval=u''): + def tmpl_ifdef(self, field, trueval='', falseval=''): """ If field exists return trueval or the field (default) otherwise, emit return falseval (if provided). @@ -1597,7 +1757,7 @@ class DefaultTemplateFunctions(object): :param falseval: The string if the condition is false :return: The string, based on condition """ - if self.item.formatted().get(field): + if field in self.item: return trueval if trueval else self.item.formatted().get(field) else: return falseval diff --git a/libs/common/beets/logging.py b/libs/common/beets/logging.py index d5ec7b73..4f004f8d 100644 --- a/libs/common/beets/logging.py +++ b/libs/common/beets/logging.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -21,13 +20,11 @@ that when getLogger(name) instantiates a logger that logger uses {}-style formatting. """ -from __future__ import division, absolute_import, print_function from copy import copy from logging import * # noqa import subprocess import threading -import six def logsafe(val): @@ -43,7 +40,7 @@ def logsafe(val): example. """ # Already Unicode. - if isinstance(val, six.text_type): + if isinstance(val, str): return val # Bytestring: needs decoding. @@ -57,7 +54,7 @@ def logsafe(val): # A "problem" object: needs a workaround. elif isinstance(val, subprocess.CalledProcessError): try: - return six.text_type(val) + return str(val) except UnicodeDecodeError: # An object with a broken __unicode__ formatter. Use __str__ # instead. @@ -74,7 +71,7 @@ class StrFormatLogger(Logger): instead of %-style formatting. """ - class _LogMessage(object): + class _LogMessage: def __init__(self, msg, args, kwargs): self.msg = msg self.args = args @@ -82,22 +79,23 @@ class StrFormatLogger(Logger): def __str__(self): args = [logsafe(a) for a in self.args] - kwargs = dict((k, logsafe(v)) for (k, v) in self.kwargs.items()) + kwargs = {k: logsafe(v) for (k, v) in self.kwargs.items()} return self.msg.format(*args, **kwargs) def _log(self, level, msg, args, exc_info=None, extra=None, **kwargs): """Log msg.format(*args, **kwargs)""" m = self._LogMessage(msg, args, kwargs) - return super(StrFormatLogger, self)._log(level, m, (), exc_info, extra) + return super()._log(level, m, (), exc_info, extra) class ThreadLocalLevelLogger(Logger): """A version of `Logger` whose level is thread-local instead of shared. """ + def __init__(self, name, level=NOTSET): self._thread_level = threading.local() self.default_level = NOTSET - super(ThreadLocalLevelLogger, self).__init__(name, level) + super().__init__(name, level) @property def level(self): diff --git a/libs/common/beets/mediafile.py b/libs/common/beets/mediafile.py index 32a32fe1..82bcc973 100644 --- a/libs/common/beets/mediafile.py +++ b/libs/common/beets/mediafile.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -13,2096 +12,15 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -"""Handles low-level interfacing for files' tags. Wraps Mutagen to -automatically detect file types and provide a unified interface for a -useful subset of music files' tags. -Usage: +import mediafile - >>> f = MediaFile('Lucy.mp3') - >>> f.title - u'Lucy in the Sky with Diamonds' - >>> f.artist = 'The Beatles' - >>> f.save() +import warnings +warnings.warn("beets.mediafile is deprecated; use mediafile instead") -A field will always return a reasonable value of the correct type, even -if no tag is present. If no value is available, the value will be false -(e.g., zero or the empty string). +# Import everything from the mediafile module into this module. +for key, value in mediafile.__dict__.items(): + if key not in ['__name__']: + globals()[key] = value -Internally ``MediaFile`` uses ``MediaField`` descriptors to access the -data from the tags. In turn ``MediaField`` uses a number of -``StorageStyle`` strategies to handle format specific logic. -""" -from __future__ import division, absolute_import, print_function - -import mutagen -import mutagen.id3 -import mutagen.mp4 -import mutagen.flac -import mutagen.asf - -import codecs -import datetime -import re -import base64 -import binascii -import math -import struct -import imghdr -import os -import traceback -import enum -import logging -import six - - -__all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile'] - -log = logging.getLogger(__name__) - -# Human-readable type names. -TYPES = { - 'mp3': 'MP3', - 'aac': 'AAC', - 'alac': 'ALAC', - 'ogg': 'OGG', - 'opus': 'Opus', - 'flac': 'FLAC', - 'ape': 'APE', - 'wv': 'WavPack', - 'mpc': 'Musepack', - 'asf': 'Windows Media', - 'aiff': 'AIFF', - 'dsf': 'DSD Stream File', -} - -PREFERRED_IMAGE_EXTENSIONS = {'jpeg': 'jpg'} - - -# Exceptions. - -class UnreadableFileError(Exception): - """Mutagen is not able to extract information from the file. - """ - def __init__(self, path, msg): - Exception.__init__(self, msg if msg else repr(path)) - - -class FileTypeError(UnreadableFileError): - """Reading this type of file is not supported. - - If passed the `mutagen_type` argument this indicates that the - mutagen type is not supported by `Mediafile`. - """ - def __init__(self, path, mutagen_type=None): - if mutagen_type is None: - msg = u'{0!r}: not in a recognized format'.format(path) - else: - msg = u'{0}: of mutagen type {1}'.format(repr(path), mutagen_type) - Exception.__init__(self, msg) - - -class MutagenError(UnreadableFileError): - """Raised when Mutagen fails unexpectedly---probably due to a bug. - """ - def __init__(self, path, mutagen_exc): - msg = u'{0}: {1}'.format(repr(path), mutagen_exc) - Exception.__init__(self, msg) - - -# Interacting with Mutagen. - -def mutagen_call(action, path, func, *args, **kwargs): - """Call a Mutagen function with appropriate error handling. - - `action` is a string describing what the function is trying to do, - and `path` is the relevant filename. The rest of the arguments - describe the callable to invoke. - - We require at least Mutagen 1.33, where `IOError` is *never* used, - neither for internal parsing errors *nor* for ordinary IO error - conditions such as a bad filename. Mutagen-specific parsing errors and IO - errors are reraised as `UnreadableFileError`. Other exceptions - raised inside Mutagen---i.e., bugs---are reraised as `MutagenError`. - """ - try: - return func(*args, **kwargs) - except mutagen.MutagenError as exc: - log.debug(u'%s failed: %s', action, six.text_type(exc)) - raise UnreadableFileError(path, six.text_type(exc)) - except Exception as exc: - # Isolate bugs in Mutagen. - log.debug(u'%s', traceback.format_exc()) - log.error(u'uncaught Mutagen exception in %s: %s', action, exc) - raise MutagenError(path, exc) - - -# Utility. - -def _safe_cast(out_type, val): - """Try to covert val to out_type but never raise an exception. If - the value can't be converted, then a sensible default value is - returned. out_type should be bool, int, or unicode; otherwise, the - value is just passed through. - """ - if val is None: - return None - - if out_type == int: - if isinstance(val, int) or isinstance(val, float): - # Just a number. - return int(val) - else: - # Process any other type as a string. - if isinstance(val, bytes): - val = val.decode('utf-8', 'ignore') - elif not isinstance(val, six.string_types): - val = six.text_type(val) - # Get a number from the front of the string. - match = re.match(r'[\+-]?[0-9]+', val.strip()) - return int(match.group(0)) if match else 0 - - elif out_type == bool: - try: - # Should work for strings, bools, ints: - return bool(int(val)) - except ValueError: - return False - - elif out_type == six.text_type: - if isinstance(val, bytes): - return val.decode('utf-8', 'ignore') - elif isinstance(val, six.text_type): - return val - else: - return six.text_type(val) - - elif out_type == float: - if isinstance(val, int) or isinstance(val, float): - return float(val) - else: - if isinstance(val, bytes): - val = val.decode('utf-8', 'ignore') - else: - val = six.text_type(val) - match = re.match(r'[\+-]?([0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)', - val.strip()) - if match: - val = match.group(0) - if val: - return float(val) - return 0.0 - - else: - return val - - -# Image coding for ASF/WMA. - -def _unpack_asf_image(data): - """Unpack image data from a WM/Picture tag. Return a tuple - containing the MIME type, the raw image data, a type indicator, and - the image's description. - - This function is treated as "untrusted" and could throw all manner - of exceptions (out-of-bounds, etc.). We should clean this up - sometime so that the failure modes are well-defined. - """ - type, size = struct.unpack_from(' 0: - gain = math.log10(maxgain / 1000.0) * -10 - else: - # Invalid gain value found. - gain = 0.0 - - # SoundCheck stores peak values as the actual value of the sample, - # and again separately for the left and right channels. We need to - # convert this to a percentage of full scale, which is 32768 for a - # 16 bit sample. Once again, we play it safe by using the larger of - # the two values. - peak = max(soundcheck[6:8]) / 32768.0 - - return round(gain, 2), round(peak, 6) - - -def _sc_encode(gain, peak): - """Encode ReplayGain gain/peak values as a Sound Check string. - """ - # SoundCheck stores the peak value as the actual value of the - # sample, rather than the percentage of full scale that RG uses, so - # we do a simple conversion assuming 16 bit samples. - peak *= 32768.0 - - # SoundCheck stores absolute RMS values in some unknown units rather - # than the dB values RG uses. We can calculate these absolute values - # from the gain ratio using a reference value of 1000 units. We also - # enforce the maximum value here, which is equivalent to about - # -18.2dB. - g1 = int(min(round((10 ** (gain / -10)) * 1000), 65534)) - # Same as above, except our reference level is 2500 units. - g2 = int(min(round((10 ** (gain / -10)) * 2500), 65534)) - - # The purpose of these values are unknown, but they also seem to be - # unused so we just use zero. - uk = 0 - values = (g1, g1, g2, g2, uk, uk, int(peak), int(peak), uk, uk) - return (u' %08X' * 10) % values - - -# Cover art and other images. -def _imghdr_what_wrapper(data): - """A wrapper around imghdr.what to account for jpeg files that can only be - identified as such using their magic bytes - See #1545 - See https://github.com/file/file/blob/master/magic/Magdir/jpeg#L12 - """ - # imghdr.what returns none for jpegs with only the magic bytes, so - # _wider_test_jpeg is run in that case. It still returns None if it didn't - # match such a jpeg file. - return imghdr.what(None, h=data) or _wider_test_jpeg(data) - - -def _wider_test_jpeg(data): - """Test for a jpeg file following the UNIX file implementation which - uses the magic bytes rather than just looking for the bytes that - represent 'JFIF' or 'EXIF' at a fixed position. - """ - if data[:2] == b'\xff\xd8': - return 'jpeg' - - -def image_mime_type(data): - """Return the MIME type of the image data (a bytestring). - """ - # This checks for a jpeg file with only the magic bytes (unrecognized by - # imghdr.what). imghdr.what returns none for that type of file, so - # _wider_test_jpeg is run in that case. It still returns None if it didn't - # match such a jpeg file. - kind = _imghdr_what_wrapper(data) - if kind in ['gif', 'jpeg', 'png', 'tiff', 'bmp']: - return 'image/{0}'.format(kind) - elif kind == 'pgm': - return 'image/x-portable-graymap' - elif kind == 'pbm': - return 'image/x-portable-bitmap' - elif kind == 'ppm': - return 'image/x-portable-pixmap' - elif kind == 'xbm': - return 'image/x-xbitmap' - else: - return 'image/x-{0}'.format(kind) - - -def image_extension(data): - ext = _imghdr_what_wrapper(data) - return PREFERRED_IMAGE_EXTENSIONS.get(ext, ext) - - -class ImageType(enum.Enum): - """Indicates the kind of an `Image` stored in a file's tag. - """ - other = 0 - icon = 1 - other_icon = 2 - front = 3 - back = 4 - leaflet = 5 - media = 6 - lead_artist = 7 - artist = 8 - conductor = 9 - group = 10 - composer = 11 - lyricist = 12 - recording_location = 13 - recording_session = 14 - performance = 15 - screen_capture = 16 - fish = 17 - illustration = 18 - artist_logo = 19 - publisher_logo = 20 - - -class Image(object): - """Structure representing image data and metadata that can be - stored and retrieved from tags. - - The structure has four properties. - * ``data`` The binary data of the image - * ``desc`` An optional description of the image - * ``type`` An instance of `ImageType` indicating the kind of image - * ``mime_type`` Read-only property that contains the mime type of - the binary data - """ - def __init__(self, data, desc=None, type=None): - assert isinstance(data, bytes) - if desc is not None: - assert isinstance(desc, six.text_type) - self.data = data - self.desc = desc - if isinstance(type, int): - try: - type = list(ImageType)[type] - except IndexError: - log.debug(u"ignoring unknown image type index %s", type) - type = ImageType.other - self.type = type - - @property - def mime_type(self): - if self.data: - return image_mime_type(self.data) - - @property - def type_index(self): - if self.type is None: - # This method is used when a tag format requires the type - # index to be set, so we return "other" as the default value. - return 0 - return self.type.value - - -# StorageStyle classes describe strategies for accessing values in -# Mutagen file objects. - -class StorageStyle(object): - """A strategy for storing a value for a certain tag format (or set - of tag formats). This basic StorageStyle describes simple 1:1 - mapping from raw values to keys in a Mutagen file object; subclasses - describe more sophisticated translations or format-specific access - strategies. - - MediaFile uses a StorageStyle via three methods: ``get()``, - ``set()``, and ``delete()``. It passes a Mutagen file object to - each. - - Internally, the StorageStyle implements ``get()`` and ``set()`` - using two steps that may be overridden by subtypes. To get a value, - the StorageStyle first calls ``fetch()`` to retrieve the value - corresponding to a key and then ``deserialize()`` to convert the raw - Mutagen value to a consumable Python value. Similarly, to set a - field, we call ``serialize()`` to encode the value and then - ``store()`` to assign the result into the Mutagen object. - - Each StorageStyle type has a class-level `formats` attribute that is - a list of strings indicating the formats that the style applies to. - MediaFile only uses StorageStyles that apply to the correct type for - a given audio file. - """ - - formats = ['FLAC', 'OggOpus', 'OggTheora', 'OggSpeex', 'OggVorbis', - 'OggFlac', 'APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio'] - """List of mutagen classes the StorageStyle can handle. - """ - - def __init__(self, key, as_type=six.text_type, suffix=None, - float_places=2): - """Create a basic storage strategy. Parameters: - - - `key`: The key on the Mutagen file object used to access the - field's data. - - `as_type`: The Python type that the value is stored as - internally (`unicode`, `int`, `bool`, or `bytes`). - - `suffix`: When `as_type` is a string type, append this before - storing the value. - - `float_places`: When the value is a floating-point number and - encoded as a string, the number of digits to store after the - decimal point. - """ - self.key = key - self.as_type = as_type - self.suffix = suffix - self.float_places = float_places - - # Convert suffix to correct string type. - if self.suffix and self.as_type is six.text_type \ - and not isinstance(self.suffix, six.text_type): - self.suffix = self.suffix.decode('utf-8') - - # Getter. - - def get(self, mutagen_file): - """Get the value for the field using this style. - """ - return self.deserialize(self.fetch(mutagen_file)) - - def fetch(self, mutagen_file): - """Retrieve the raw value of for this tag from the Mutagen file - object. - """ - try: - return mutagen_file[self.key][0] - except (KeyError, IndexError): - return None - - def deserialize(self, mutagen_value): - """Given a raw value stored on a Mutagen object, decode and - return the represented value. - """ - if self.suffix and isinstance(mutagen_value, six.text_type) \ - and mutagen_value.endswith(self.suffix): - return mutagen_value[:-len(self.suffix)] - else: - return mutagen_value - - # Setter. - - def set(self, mutagen_file, value): - """Assign the value for the field using this style. - """ - self.store(mutagen_file, self.serialize(value)) - - def store(self, mutagen_file, value): - """Store a serialized value in the Mutagen file object. - """ - mutagen_file[self.key] = [value] - - def serialize(self, value): - """Convert the external Python value to a type that is suitable for - storing in a Mutagen file object. - """ - if isinstance(value, float) and self.as_type is six.text_type: - value = u'{0:.{1}f}'.format(value, self.float_places) - value = self.as_type(value) - elif self.as_type is six.text_type: - if isinstance(value, bool): - # Store bools as 1/0 instead of True/False. - value = six.text_type(int(bool(value))) - elif isinstance(value, bytes): - value = value.decode('utf-8', 'ignore') - else: - value = six.text_type(value) - else: - value = self.as_type(value) - - if self.suffix: - value += self.suffix - - return value - - def delete(self, mutagen_file): - """Remove the tag from the file. - """ - if self.key in mutagen_file: - del mutagen_file[self.key] - - -class ListStorageStyle(StorageStyle): - """Abstract storage style that provides access to lists. - - The ListMediaField descriptor uses a ListStorageStyle via two - methods: ``get_list()`` and ``set_list()``. It passes a Mutagen file - object to each. - - Subclasses may overwrite ``fetch`` and ``store``. ``fetch`` must - return a (possibly empty) list and ``store`` receives a serialized - list of values as the second argument. - - The `serialize` and `deserialize` methods (from the base - `StorageStyle`) are still called with individual values. This class - handles packing and unpacking the values into lists. - """ - def get(self, mutagen_file): - """Get the first value in the field's value list. - """ - try: - return self.get_list(mutagen_file)[0] - except IndexError: - return None - - def get_list(self, mutagen_file): - """Get a list of all values for the field using this style. - """ - return [self.deserialize(item) for item in self.fetch(mutagen_file)] - - def fetch(self, mutagen_file): - """Get the list of raw (serialized) values. - """ - try: - return mutagen_file[self.key] - except KeyError: - return [] - - def set(self, mutagen_file, value): - """Set an individual value as the only value for the field using - this style. - """ - self.set_list(mutagen_file, [value]) - - def set_list(self, mutagen_file, values): - """Set all values for the field using this style. `values` - should be an iterable. - """ - self.store(mutagen_file, [self.serialize(value) for value in values]) - - def store(self, mutagen_file, values): - """Set the list of all raw (serialized) values for this field. - """ - mutagen_file[self.key] = values - - -class SoundCheckStorageStyleMixin(object): - """A mixin for storage styles that read and write iTunes SoundCheck - analysis values. The object must have an `index` field that - indicates which half of the gain/peak pair---0 or 1---the field - represents. - """ - def get(self, mutagen_file): - data = self.fetch(mutagen_file) - if data is not None: - return _sc_decode(data)[self.index] - - def set(self, mutagen_file, value): - data = self.fetch(mutagen_file) - if data is None: - gain_peak = [0, 0] - else: - gain_peak = list(_sc_decode(data)) - gain_peak[self.index] = value or 0 - data = self.serialize(_sc_encode(*gain_peak)) - self.store(mutagen_file, data) - - -class ASFStorageStyle(ListStorageStyle): - """A general storage style for Windows Media/ASF files. - """ - formats = ['ASF'] - - def deserialize(self, data): - if isinstance(data, mutagen.asf.ASFBaseAttribute): - data = data.value - return data - - -class MP4StorageStyle(StorageStyle): - """A general storage style for MPEG-4 tags. - """ - formats = ['MP4'] - - def serialize(self, value): - value = super(MP4StorageStyle, self).serialize(value) - if self.key.startswith('----:') and isinstance(value, six.text_type): - value = value.encode('utf-8') - return value - - -class MP4TupleStorageStyle(MP4StorageStyle): - """A style for storing values as part of a pair of numbers in an - MPEG-4 file. - """ - def __init__(self, key, index=0, **kwargs): - super(MP4TupleStorageStyle, self).__init__(key, **kwargs) - self.index = index - - def deserialize(self, mutagen_value): - items = mutagen_value or [] - packing_length = 2 - return list(items) + [0] * (packing_length - len(items)) - - def get(self, mutagen_file): - value = super(MP4TupleStorageStyle, self).get(mutagen_file)[self.index] - if value == 0: - # The values are always present and saved as integers. So we - # assume that "0" indicates it is not set. - return None - else: - return value - - def set(self, mutagen_file, value): - if value is None: - value = 0 - items = self.deserialize(self.fetch(mutagen_file)) - items[self.index] = int(value) - self.store(mutagen_file, items) - - def delete(self, mutagen_file): - if self.index == 0: - super(MP4TupleStorageStyle, self).delete(mutagen_file) - else: - self.set(mutagen_file, None) - - -class MP4ListStorageStyle(ListStorageStyle, MP4StorageStyle): - pass - - -class MP4SoundCheckStorageStyle(SoundCheckStorageStyleMixin, MP4StorageStyle): - def __init__(self, key, index=0, **kwargs): - super(MP4SoundCheckStorageStyle, self).__init__(key, **kwargs) - self.index = index - - -class MP4BoolStorageStyle(MP4StorageStyle): - """A style for booleans in MPEG-4 files. (MPEG-4 has an atom type - specifically for representing booleans.) - """ - def get(self, mutagen_file): - try: - return mutagen_file[self.key] - except KeyError: - return None - - def get_list(self, mutagen_file): - raise NotImplementedError(u'MP4 bool storage does not support lists') - - def set(self, mutagen_file, value): - mutagen_file[self.key] = value - - def set_list(self, mutagen_file, values): - raise NotImplementedError(u'MP4 bool storage does not support lists') - - -class MP4ImageStorageStyle(MP4ListStorageStyle): - """Store images as MPEG-4 image atoms. Values are `Image` objects. - """ - def __init__(self, **kwargs): - super(MP4ImageStorageStyle, self).__init__(key='covr', **kwargs) - - def deserialize(self, data): - return Image(data) - - def serialize(self, image): - if image.mime_type == 'image/png': - kind = mutagen.mp4.MP4Cover.FORMAT_PNG - elif image.mime_type == 'image/jpeg': - kind = mutagen.mp4.MP4Cover.FORMAT_JPEG - else: - raise ValueError(u'MP4 files only supports PNG and JPEG images') - return mutagen.mp4.MP4Cover(image.data, kind) - - -class MP3StorageStyle(StorageStyle): - """Store data in ID3 frames. - """ - formats = ['MP3', 'AIFF', 'DSF'] - - def __init__(self, key, id3_lang=None, **kwargs): - """Create a new ID3 storage style. `id3_lang` is the value for - the language field of newly created frames. - """ - self.id3_lang = id3_lang - super(MP3StorageStyle, self).__init__(key, **kwargs) - - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].text[0] - except (KeyError, IndexError): - return None - - def store(self, mutagen_file, value): - frame = mutagen.id3.Frames[self.key](encoding=3, text=[value]) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3PeopleStorageStyle(MP3StorageStyle): - """Store list of people in ID3 frames. - """ - def __init__(self, key, involvement='', **kwargs): - self.involvement = involvement - super(MP3PeopleStorageStyle, self).__init__(key, **kwargs) - - def store(self, mutagen_file, value): - frames = mutagen_file.tags.getall(self.key) - - # Try modifying in place. - found = False - for frame in frames: - if frame.encoding == mutagen.id3.Encoding.UTF8: - for pair in frame.people: - if pair[0].lower() == self.involvement.lower(): - pair[1] = value - found = True - - # Try creating a new frame. - if not found: - frame = mutagen.id3.Frames[self.key]( - encoding=mutagen.id3.Encoding.UTF8, - people=[[self.involvement, value]] - ) - mutagen_file.tags.add(frame) - - def fetch(self, mutagen_file): - for frame in mutagen_file.tags.getall(self.key): - for pair in frame.people: - if pair[0].lower() == self.involvement.lower(): - try: - return pair[1] - except IndexError: - return None - - -class MP3ListStorageStyle(ListStorageStyle, MP3StorageStyle): - """Store lists of data in multiple ID3 frames. - """ - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].text - except KeyError: - return [] - - def store(self, mutagen_file, values): - frame = mutagen.id3.Frames[self.key](encoding=3, text=values) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3UFIDStorageStyle(MP3StorageStyle): - """Store string data in a UFID ID3 frame with a particular owner. - """ - def __init__(self, owner, **kwargs): - self.owner = owner - super(MP3UFIDStorageStyle, self).__init__('UFID:' + owner, **kwargs) - - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].data - except KeyError: - return None - - def store(self, mutagen_file, value): - # This field type stores text data as encoded data. - assert isinstance(value, six.text_type) - value = value.encode('utf-8') - - frames = mutagen_file.tags.getall(self.key) - for frame in frames: - # Replace existing frame data. - if frame.owner == self.owner: - frame.data = value - else: - # New frame. - frame = mutagen.id3.UFID(owner=self.owner, data=value) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3DescStorageStyle(MP3StorageStyle): - """Store data in a TXXX (or similar) ID3 frame. The frame is - selected based its ``desc`` field. - """ - def __init__(self, desc=u'', key='TXXX', **kwargs): - assert isinstance(desc, six.text_type) - self.description = desc - super(MP3DescStorageStyle, self).__init__(key=key, **kwargs) - - def store(self, mutagen_file, value): - frames = mutagen_file.tags.getall(self.key) - if self.key != 'USLT': - value = [value] - - # Try modifying in place. - found = False - for frame in frames: - if frame.desc.lower() == self.description.lower(): - frame.text = value - frame.encoding = mutagen.id3.Encoding.UTF8 - found = True - - # Try creating a new frame. - if not found: - frame = mutagen.id3.Frames[self.key]( - desc=self.description, - text=value, - encoding=mutagen.id3.Encoding.UTF8, - ) - if self.id3_lang: - frame.lang = self.id3_lang - mutagen_file.tags.add(frame) - - def fetch(self, mutagen_file): - for frame in mutagen_file.tags.getall(self.key): - if frame.desc.lower() == self.description.lower(): - if self.key == 'USLT': - return frame.text - try: - return frame.text[0] - except IndexError: - return None - - def delete(self, mutagen_file): - found_frame = None - for frame in mutagen_file.tags.getall(self.key): - if frame.desc.lower() == self.description.lower(): - found_frame = frame - break - if found_frame is not None: - del mutagen_file[frame.HashKey] - - -class MP3SlashPackStorageStyle(MP3StorageStyle): - """Store value as part of pair that is serialized as a slash- - separated string. - """ - def __init__(self, key, pack_pos=0, **kwargs): - super(MP3SlashPackStorageStyle, self).__init__(key, **kwargs) - self.pack_pos = pack_pos - - def _fetch_unpacked(self, mutagen_file): - data = self.fetch(mutagen_file) - if data: - items = six.text_type(data).split('/') - else: - items = [] - packing_length = 2 - return list(items) + [None] * (packing_length - len(items)) - - def get(self, mutagen_file): - return self._fetch_unpacked(mutagen_file)[self.pack_pos] - - def set(self, mutagen_file, value): - items = self._fetch_unpacked(mutagen_file) - items[self.pack_pos] = value - if items[0] is None: - items[0] = '' - if items[1] is None: - items.pop() # Do not store last value - self.store(mutagen_file, '/'.join(map(six.text_type, items))) - - def delete(self, mutagen_file): - if self.pack_pos == 0: - super(MP3SlashPackStorageStyle, self).delete(mutagen_file) - else: - self.set(mutagen_file, None) - - -class MP3ImageStorageStyle(ListStorageStyle, MP3StorageStyle): - """Converts between APIC frames and ``Image`` instances. - - The `get_list` method inherited from ``ListStorageStyle`` returns a - list of ``Image``s. Similarly, the `set_list` method accepts a - list of ``Image``s as its ``values`` argument. - """ - def __init__(self): - super(MP3ImageStorageStyle, self).__init__(key='APIC') - self.as_type = bytes - - def deserialize(self, apic_frame): - """Convert APIC frame into Image.""" - return Image(data=apic_frame.data, desc=apic_frame.desc, - type=apic_frame.type) - - def fetch(self, mutagen_file): - return mutagen_file.tags.getall(self.key) - - def store(self, mutagen_file, frames): - mutagen_file.tags.setall(self.key, frames) - - def delete(self, mutagen_file): - mutagen_file.tags.delall(self.key) - - def serialize(self, image): - """Return an APIC frame populated with data from ``image``. - """ - assert isinstance(image, Image) - frame = mutagen.id3.Frames[self.key]() - frame.data = image.data - frame.mime = image.mime_type - frame.desc = image.desc or u'' - - # For compatibility with OS X/iTunes prefer latin-1 if possible. - # See issue #899 - try: - frame.desc.encode("latin-1") - except UnicodeEncodeError: - frame.encoding = mutagen.id3.Encoding.UTF16 - else: - frame.encoding = mutagen.id3.Encoding.LATIN1 - - frame.type = image.type_index - return frame - - -class MP3SoundCheckStorageStyle(SoundCheckStorageStyleMixin, - MP3DescStorageStyle): - def __init__(self, index=0, **kwargs): - super(MP3SoundCheckStorageStyle, self).__init__(**kwargs) - self.index = index - - -class ASFImageStorageStyle(ListStorageStyle): - """Store images packed into Windows Media/ASF byte array attributes. - Values are `Image` objects. - """ - formats = ['ASF'] - - def __init__(self): - super(ASFImageStorageStyle, self).__init__(key='WM/Picture') - - def deserialize(self, asf_picture): - mime, data, type, desc = _unpack_asf_image(asf_picture.value) - return Image(data, desc=desc, type=type) - - def serialize(self, image): - pic = mutagen.asf.ASFByteArrayAttribute() - pic.value = _pack_asf_image(image.mime_type, image.data, - type=image.type_index, - description=image.desc or u'') - return pic - - -class VorbisImageStorageStyle(ListStorageStyle): - """Store images in Vorbis comments. Both legacy COVERART fields and - modern METADATA_BLOCK_PICTURE tags are supported. Data is - base64-encoded. Values are `Image` objects. - """ - formats = ['OggOpus', 'OggTheora', 'OggSpeex', 'OggVorbis', - 'OggFlac'] - - def __init__(self): - super(VorbisImageStorageStyle, self).__init__( - key='metadata_block_picture' - ) - self.as_type = bytes - - def fetch(self, mutagen_file): - images = [] - if 'metadata_block_picture' not in mutagen_file: - # Try legacy COVERART tags. - if 'coverart' in mutagen_file: - for data in mutagen_file['coverart']: - images.append(Image(base64.b64decode(data))) - return images - for data in mutagen_file["metadata_block_picture"]: - try: - pic = mutagen.flac.Picture(base64.b64decode(data)) - except (TypeError, AttributeError): - continue - images.append(Image(data=pic.data, desc=pic.desc, - type=pic.type)) - return images - - def store(self, mutagen_file, image_data): - # Strip all art, including legacy COVERART. - if 'coverart' in mutagen_file: - del mutagen_file['coverart'] - if 'coverartmime' in mutagen_file: - del mutagen_file['coverartmime'] - super(VorbisImageStorageStyle, self).store(mutagen_file, image_data) - - def serialize(self, image): - """Turn a Image into a base64 encoded FLAC picture block. - """ - pic = mutagen.flac.Picture() - pic.data = image.data - pic.type = image.type_index - pic.mime = image.mime_type - pic.desc = image.desc or u'' - - # Encoding with base64 returns bytes on both Python 2 and 3. - # Mutagen requires the data to be a Unicode string, so we decode - # it before passing it along. - return base64.b64encode(pic.write()).decode('ascii') - - -class FlacImageStorageStyle(ListStorageStyle): - """Converts between ``mutagen.flac.Picture`` and ``Image`` instances. - """ - formats = ['FLAC'] - - def __init__(self): - super(FlacImageStorageStyle, self).__init__(key='') - - def fetch(self, mutagen_file): - return mutagen_file.pictures - - def deserialize(self, flac_picture): - return Image(data=flac_picture.data, desc=flac_picture.desc, - type=flac_picture.type) - - def store(self, mutagen_file, pictures): - """``pictures`` is a list of mutagen.flac.Picture instances. - """ - mutagen_file.clear_pictures() - for pic in pictures: - mutagen_file.add_picture(pic) - - def serialize(self, image): - """Turn a Image into a mutagen.flac.Picture. - """ - pic = mutagen.flac.Picture() - pic.data = image.data - pic.type = image.type_index - pic.mime = image.mime_type - pic.desc = image.desc or u'' - return pic - - def delete(self, mutagen_file): - """Remove all images from the file. - """ - mutagen_file.clear_pictures() - - -class APEv2ImageStorageStyle(ListStorageStyle): - """Store images in APEv2 tags. Values are `Image` objects. - """ - formats = ['APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio', 'OptimFROG'] - - TAG_NAMES = { - ImageType.other: 'Cover Art (other)', - ImageType.icon: 'Cover Art (icon)', - ImageType.other_icon: 'Cover Art (other icon)', - ImageType.front: 'Cover Art (front)', - ImageType.back: 'Cover Art (back)', - ImageType.leaflet: 'Cover Art (leaflet)', - ImageType.media: 'Cover Art (media)', - ImageType.lead_artist: 'Cover Art (lead)', - ImageType.artist: 'Cover Art (artist)', - ImageType.conductor: 'Cover Art (conductor)', - ImageType.group: 'Cover Art (band)', - ImageType.composer: 'Cover Art (composer)', - ImageType.lyricist: 'Cover Art (lyricist)', - ImageType.recording_location: 'Cover Art (studio)', - ImageType.recording_session: 'Cover Art (recording)', - ImageType.performance: 'Cover Art (performance)', - ImageType.screen_capture: 'Cover Art (movie scene)', - ImageType.fish: 'Cover Art (colored fish)', - ImageType.illustration: 'Cover Art (illustration)', - ImageType.artist_logo: 'Cover Art (band logo)', - ImageType.publisher_logo: 'Cover Art (publisher logo)', - } - - def __init__(self): - super(APEv2ImageStorageStyle, self).__init__(key='') - - def fetch(self, mutagen_file): - images = [] - for cover_type, cover_tag in self.TAG_NAMES.items(): - try: - frame = mutagen_file[cover_tag] - text_delimiter_index = frame.value.find(b'\x00') - if text_delimiter_index > 0: - comment = frame.value[0:text_delimiter_index] - comment = comment.decode('utf-8', 'replace') - else: - comment = None - image_data = frame.value[text_delimiter_index + 1:] - images.append(Image(data=image_data, type=cover_type, - desc=comment)) - except KeyError: - pass - - return images - - def set_list(self, mutagen_file, values): - self.delete(mutagen_file) - - for image in values: - image_type = image.type or ImageType.other - comment = image.desc or '' - image_data = comment.encode('utf-8') + b'\x00' + image.data - cover_tag = self.TAG_NAMES[image_type] - mutagen_file[cover_tag] = image_data - - def delete(self, mutagen_file): - """Remove all images from the file. - """ - for cover_tag in self.TAG_NAMES.values(): - try: - del mutagen_file[cover_tag] - except KeyError: - pass - - -# MediaField is a descriptor that represents a single logical field. It -# aggregates several StorageStyles describing how to access the data for -# each file type. - -class MediaField(object): - """A descriptor providing access to a particular (abstract) metadata - field. - """ - def __init__(self, *styles, **kwargs): - """Creates a new MediaField. - - :param styles: `StorageStyle` instances that describe the strategy - for reading and writing the field in particular - formats. There must be at least one style for - each possible file format. - - :param out_type: the type of the value that should be returned when - getting this property. - - """ - self.out_type = kwargs.get('out_type', six.text_type) - self._styles = styles - - def styles(self, mutagen_file): - """Yields the list of storage styles of this field that can - handle the MediaFile's format. - """ - for style in self._styles: - if mutagen_file.__class__.__name__ in style.formats: - yield style - - def __get__(self, mediafile, owner=None): - out = None - for style in self.styles(mediafile.mgfile): - out = style.get(mediafile.mgfile) - if out: - break - return _safe_cast(self.out_type, out) - - def __set__(self, mediafile, value): - if value is None: - value = self._none_value() - for style in self.styles(mediafile.mgfile): - style.set(mediafile.mgfile, value) - - def __delete__(self, mediafile): - for style in self.styles(mediafile.mgfile): - style.delete(mediafile.mgfile) - - def _none_value(self): - """Get an appropriate "null" value for this field's type. This - is used internally when setting the field to None. - """ - if self.out_type == int: - return 0 - elif self.out_type == float: - return 0.0 - elif self.out_type == bool: - return False - elif self.out_type == six.text_type: - return u'' - - -class ListMediaField(MediaField): - """Property descriptor that retrieves a list of multiple values from - a tag. - - Uses ``get_list`` and set_list`` methods of its ``StorageStyle`` - strategies to do the actual work. - """ - def __get__(self, mediafile, _): - values = [] - for style in self.styles(mediafile.mgfile): - values.extend(style.get_list(mediafile.mgfile)) - return [_safe_cast(self.out_type, value) for value in values] - - def __set__(self, mediafile, values): - for style in self.styles(mediafile.mgfile): - style.set_list(mediafile.mgfile, values) - - def single_field(self): - """Returns a ``MediaField`` descriptor that gets and sets the - first item. - """ - options = {'out_type': self.out_type} - return MediaField(*self._styles, **options) - - -class DateField(MediaField): - """Descriptor that handles serializing and deserializing dates - - The getter parses value from tags into a ``datetime.date`` instance - and setter serializes such an instance into a string. - - For granular access to year, month, and day, use the ``*_field`` - methods to create corresponding `DateItemField`s. - """ - def __init__(self, *date_styles, **kwargs): - """``date_styles`` is a list of ``StorageStyle``s to store and - retrieve the whole date from. The ``year`` option is an - additional list of fallback styles for the year. The year is - always set on this style, but is only retrieved if the main - storage styles do not return a value. - """ - super(DateField, self).__init__(*date_styles) - year_style = kwargs.get('year', None) - if year_style: - self._year_field = MediaField(*year_style) - - def __get__(self, mediafile, owner=None): - year, month, day = self._get_date_tuple(mediafile) - if not year: - return None - try: - return datetime.date( - year, - month or 1, - day or 1 - ) - except ValueError: # Out of range values. - return None - - def __set__(self, mediafile, date): - if date is None: - self._set_date_tuple(mediafile, None, None, None) - else: - self._set_date_tuple(mediafile, date.year, date.month, date.day) - - def __delete__(self, mediafile): - super(DateField, self).__delete__(mediafile) - if hasattr(self, '_year_field'): - self._year_field.__delete__(mediafile) - - def _get_date_tuple(self, mediafile): - """Get a 3-item sequence representing the date consisting of a - year, month, and day number. Each number is either an integer or - None. - """ - # Get the underlying data and split on hyphens and slashes. - datestring = super(DateField, self).__get__(mediafile, None) - if isinstance(datestring, six.string_types): - datestring = re.sub(r'[Tt ].*$', '', six.text_type(datestring)) - items = re.split('[-/]', six.text_type(datestring)) - else: - items = [] - - # Ensure that we have exactly 3 components, possibly by - # truncating or padding. - items = items[:3] - if len(items) < 3: - items += [None] * (3 - len(items)) - - # Use year field if year is missing. - if not items[0] and hasattr(self, '_year_field'): - items[0] = self._year_field.__get__(mediafile) - - # Convert each component to an integer if possible. - items_ = [] - for item in items: - try: - items_.append(int(item)) - except (TypeError, ValueError): - items_.append(None) - return items_ - - def _set_date_tuple(self, mediafile, year, month=None, day=None): - """Set the value of the field given a year, month, and day - number. Each number can be an integer or None to indicate an - unset component. - """ - if year is None: - self.__delete__(mediafile) - return - - date = [u'{0:04d}'.format(int(year))] - if month: - date.append(u'{0:02d}'.format(int(month))) - if month and day: - date.append(u'{0:02d}'.format(int(day))) - date = map(six.text_type, date) - super(DateField, self).__set__(mediafile, u'-'.join(date)) - - if hasattr(self, '_year_field'): - self._year_field.__set__(mediafile, year) - - def year_field(self): - return DateItemField(self, 0) - - def month_field(self): - return DateItemField(self, 1) - - def day_field(self): - return DateItemField(self, 2) - - -class DateItemField(MediaField): - """Descriptor that gets and sets constituent parts of a `DateField`: - the month, day, or year. - """ - def __init__(self, date_field, item_pos): - self.date_field = date_field - self.item_pos = item_pos - - def __get__(self, mediafile, _): - return self.date_field._get_date_tuple(mediafile)[self.item_pos] - - def __set__(self, mediafile, value): - items = self.date_field._get_date_tuple(mediafile) - items[self.item_pos] = value - self.date_field._set_date_tuple(mediafile, *items) - - def __delete__(self, mediafile): - self.__set__(mediafile, None) - - -class CoverArtField(MediaField): - """A descriptor that provides access to the *raw image data* for the - cover image on a file. This is used for backwards compatibility: the - full `ImageListField` provides richer `Image` objects. - - When there are multiple images we try to pick the most likely to be a front - cover. - """ - def __init__(self): - pass - - def __get__(self, mediafile, _): - candidates = mediafile.images - if candidates: - return self.guess_cover_image(candidates).data - else: - return None - - @staticmethod - def guess_cover_image(candidates): - if len(candidates) == 1: - return candidates[0] - try: - return next(c for c in candidates if c.type == ImageType.front) - except StopIteration: - return candidates[0] - - def __set__(self, mediafile, data): - if data: - mediafile.images = [Image(data=data)] - else: - mediafile.images = [] - - def __delete__(self, mediafile): - delattr(mediafile, 'images') - - -class ImageListField(ListMediaField): - """Descriptor to access the list of images embedded in tags. - - The getter returns a list of `Image` instances obtained from - the tags. The setter accepts a list of `Image` instances to be - written to the tags. - """ - def __init__(self): - # The storage styles used here must implement the - # `ListStorageStyle` interface and get and set lists of - # `Image`s. - super(ImageListField, self).__init__( - MP3ImageStorageStyle(), - MP4ImageStorageStyle(), - ASFImageStorageStyle(), - VorbisImageStorageStyle(), - FlacImageStorageStyle(), - APEv2ImageStorageStyle(), - out_type=Image, - ) - - -# MediaFile is a collection of fields. - -class MediaFile(object): - """Represents a multimedia file on disk and provides access to its - metadata. - """ - def __init__(self, path, id3v23=False): - """Constructs a new `MediaFile` reflecting the file at path. May - throw `UnreadableFileError`. - - By default, MP3 files are saved with ID3v2.4 tags. You can use - the older ID3v2.3 standard by specifying the `id3v23` option. - """ - self.path = path - - self.mgfile = mutagen_call('open', path, mutagen.File, path) - - if self.mgfile is None: - # Mutagen couldn't guess the type - raise FileTypeError(path) - elif (type(self.mgfile).__name__ == 'M4A' or - type(self.mgfile).__name__ == 'MP4'): - info = self.mgfile.info - if info.codec and info.codec.startswith('alac'): - self.type = 'alac' - else: - self.type = 'aac' - elif (type(self.mgfile).__name__ == 'ID3' or - type(self.mgfile).__name__ == 'MP3'): - self.type = 'mp3' - elif type(self.mgfile).__name__ == 'FLAC': - self.type = 'flac' - elif type(self.mgfile).__name__ == 'OggOpus': - self.type = 'opus' - elif type(self.mgfile).__name__ == 'OggVorbis': - self.type = 'ogg' - elif type(self.mgfile).__name__ == 'MonkeysAudio': - self.type = 'ape' - elif type(self.mgfile).__name__ == 'WavPack': - self.type = 'wv' - elif type(self.mgfile).__name__ == 'Musepack': - self.type = 'mpc' - elif type(self.mgfile).__name__ == 'ASF': - self.type = 'asf' - elif type(self.mgfile).__name__ == 'AIFF': - self.type = 'aiff' - elif type(self.mgfile).__name__ == 'DSF': - self.type = 'dsf' - else: - raise FileTypeError(path, type(self.mgfile).__name__) - - # Add a set of tags if it's missing. - if self.mgfile.tags is None: - self.mgfile.add_tags() - - # Set the ID3v2.3 flag only for MP3s. - self.id3v23 = id3v23 and self.type == 'mp3' - - def save(self): - """Write the object's tags back to the file. May - throw `UnreadableFileError`. - """ - # Possibly save the tags to ID3v2.3. - kwargs = {} - if self.id3v23: - id3 = self.mgfile - if hasattr(id3, 'tags'): - # In case this is an MP3 object, not an ID3 object. - id3 = id3.tags - id3.update_to_v23() - kwargs['v2_version'] = 3 - - mutagen_call('save', self.path, self.mgfile.save, **kwargs) - - def delete(self): - """Remove the current metadata tag from the file. May - throw `UnreadableFileError`. - """ - mutagen_call('delete', self.path, self.mgfile.delete) - - # Convenient access to the set of available fields. - - @classmethod - def fields(cls): - """Get the names of all writable properties that reflect - metadata tags (i.e., those that are instances of - :class:`MediaField`). - """ - for property, descriptor in cls.__dict__.items(): - if isinstance(descriptor, MediaField): - if isinstance(property, bytes): - # On Python 2, class field names are bytes. This method - # produces text strings. - yield property.decode('utf8', 'ignore') - else: - yield property - - @classmethod - def _field_sort_name(cls, name): - """Get a sort key for a field name that determines the order - fields should be written in. - - Fields names are kept unchanged, unless they are instances of - :class:`DateItemField`, in which case `year`, `month`, and `day` - are replaced by `date0`, `date1`, and `date2`, respectively, to - make them appear in that order. - """ - if isinstance(cls.__dict__[name], DateItemField): - name = re.sub('year', 'date0', name) - name = re.sub('month', 'date1', name) - name = re.sub('day', 'date2', name) - return name - - @classmethod - def sorted_fields(cls): - """Get the names of all writable metadata fields, sorted in the - order that they should be written. - - This is a lexicographic order, except for instances of - :class:`DateItemField`, which are sorted in year-month-day - order. - """ - for property in sorted(cls.fields(), key=cls._field_sort_name): - yield property - - @classmethod - def readable_fields(cls): - """Get all metadata fields: the writable ones from - :meth:`fields` and also other audio properties. - """ - for property in cls.fields(): - yield property - for property in ('length', 'samplerate', 'bitdepth', 'bitrate', - 'channels', 'format'): - yield property - - @classmethod - def add_field(cls, name, descriptor): - """Add a field to store custom tags. - - :param name: the name of the property the field is accessed - through. It must not already exist on this class. - - :param descriptor: an instance of :class:`MediaField`. - """ - if not isinstance(descriptor, MediaField): - raise ValueError( - u'{0} must be an instance of MediaField'.format(descriptor)) - if name in cls.__dict__: - raise ValueError( - u'property "{0}" already exists on MediaField'.format(name)) - setattr(cls, name, descriptor) - - def update(self, dict): - """Set all field values from a dictionary. - - For any key in `dict` that is also a field to store tags the - method retrieves the corresponding value from `dict` and updates - the `MediaFile`. If a key has the value `None`, the - corresponding property is deleted from the `MediaFile`. - """ - for field in self.sorted_fields(): - if field in dict: - if dict[field] is None: - delattr(self, field) - else: - setattr(self, field, dict[field]) - - # Field definitions. - - title = MediaField( - MP3StorageStyle('TIT2'), - MP4StorageStyle('\xa9nam'), - StorageStyle('TITLE'), - ASFStorageStyle('Title'), - ) - artist = MediaField( - MP3StorageStyle('TPE1'), - MP4StorageStyle('\xa9ART'), - StorageStyle('ARTIST'), - ASFStorageStyle('Author'), - ) - album = MediaField( - MP3StorageStyle('TALB'), - MP4StorageStyle('\xa9alb'), - StorageStyle('ALBUM'), - ASFStorageStyle('WM/AlbumTitle'), - ) - genres = ListMediaField( - MP3ListStorageStyle('TCON'), - MP4ListStorageStyle('\xa9gen'), - ListStorageStyle('GENRE'), - ASFStorageStyle('WM/Genre'), - ) - genre = genres.single_field() - - lyricist = MediaField( - MP3StorageStyle('TEXT'), - MP4StorageStyle('----:com.apple.iTunes:LYRICIST'), - StorageStyle('LYRICIST'), - ASFStorageStyle('WM/Writer'), - ) - composer = MediaField( - MP3StorageStyle('TCOM'), - MP4StorageStyle('\xa9wrt'), - StorageStyle('COMPOSER'), - ASFStorageStyle('WM/Composer'), - ) - composer_sort = MediaField( - MP3StorageStyle('TSOC'), - MP4StorageStyle('soco'), - StorageStyle('COMPOSERSORT'), - ASFStorageStyle('WM/Composersortorder'), - ) - arranger = MediaField( - MP3PeopleStorageStyle('TIPL', involvement='arranger'), - MP4StorageStyle('----:com.apple.iTunes:Arranger'), - StorageStyle('ARRANGER'), - ASFStorageStyle('beets/Arranger'), - ) - - grouping = MediaField( - MP3StorageStyle('TIT1'), - MP4StorageStyle('\xa9grp'), - StorageStyle('GROUPING'), - ASFStorageStyle('WM/ContentGroupDescription'), - ) - track = MediaField( - MP3SlashPackStorageStyle('TRCK', pack_pos=0), - MP4TupleStorageStyle('trkn', index=0), - StorageStyle('TRACK'), - StorageStyle('TRACKNUMBER'), - ASFStorageStyle('WM/TrackNumber'), - out_type=int, - ) - tracktotal = MediaField( - MP3SlashPackStorageStyle('TRCK', pack_pos=1), - MP4TupleStorageStyle('trkn', index=1), - StorageStyle('TRACKTOTAL'), - StorageStyle('TRACKC'), - StorageStyle('TOTALTRACKS'), - ASFStorageStyle('TotalTracks'), - out_type=int, - ) - disc = MediaField( - MP3SlashPackStorageStyle('TPOS', pack_pos=0), - MP4TupleStorageStyle('disk', index=0), - StorageStyle('DISC'), - StorageStyle('DISCNUMBER'), - ASFStorageStyle('WM/PartOfSet'), - out_type=int, - ) - disctotal = MediaField( - MP3SlashPackStorageStyle('TPOS', pack_pos=1), - MP4TupleStorageStyle('disk', index=1), - StorageStyle('DISCTOTAL'), - StorageStyle('DISCC'), - StorageStyle('TOTALDISCS'), - ASFStorageStyle('TotalDiscs'), - out_type=int, - ) - lyrics = MediaField( - MP3DescStorageStyle(key='USLT'), - MP4StorageStyle('\xa9lyr'), - StorageStyle('LYRICS'), - ASFStorageStyle('WM/Lyrics'), - ) - comments = MediaField( - MP3DescStorageStyle(key='COMM'), - MP4StorageStyle('\xa9cmt'), - StorageStyle('DESCRIPTION'), - StorageStyle('COMMENT'), - ASFStorageStyle('WM/Comments'), - ASFStorageStyle('Description') - ) - bpm = MediaField( - MP3StorageStyle('TBPM'), - MP4StorageStyle('tmpo', as_type=int), - StorageStyle('BPM'), - ASFStorageStyle('WM/BeatsPerMinute'), - out_type=int, - ) - comp = MediaField( - MP3StorageStyle('TCMP'), - MP4BoolStorageStyle('cpil'), - StorageStyle('COMPILATION'), - ASFStorageStyle('WM/IsCompilation', as_type=bool), - out_type=bool, - ) - albumartist = MediaField( - MP3StorageStyle('TPE2'), - MP4StorageStyle('aART'), - StorageStyle('ALBUM ARTIST'), - StorageStyle('ALBUMARTIST'), - ASFStorageStyle('WM/AlbumArtist'), - ) - albumtype = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Type'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Type'), - StorageStyle('MUSICBRAINZ_ALBUMTYPE'), - ASFStorageStyle('MusicBrainz/Album Type'), - ) - label = MediaField( - MP3StorageStyle('TPUB'), - MP4StorageStyle('----:com.apple.iTunes:Label'), - MP4StorageStyle('----:com.apple.iTunes:publisher'), - StorageStyle('LABEL'), - StorageStyle('PUBLISHER'), # Traktor - ASFStorageStyle('WM/Publisher'), - ) - artist_sort = MediaField( - MP3StorageStyle('TSOP'), - MP4StorageStyle('soar'), - StorageStyle('ARTISTSORT'), - ASFStorageStyle('WM/ArtistSortOrder'), - ) - albumartist_sort = MediaField( - MP3DescStorageStyle(u'ALBUMARTISTSORT'), - MP4StorageStyle('soaa'), - StorageStyle('ALBUMARTISTSORT'), - ASFStorageStyle('WM/AlbumArtistSortOrder'), - ) - asin = MediaField( - MP3DescStorageStyle(u'ASIN'), - MP4StorageStyle('----:com.apple.iTunes:ASIN'), - StorageStyle('ASIN'), - ASFStorageStyle('MusicBrainz/ASIN'), - ) - catalognum = MediaField( - MP3DescStorageStyle(u'CATALOGNUMBER'), - MP4StorageStyle('----:com.apple.iTunes:CATALOGNUMBER'), - StorageStyle('CATALOGNUMBER'), - ASFStorageStyle('WM/CatalogNo'), - ) - disctitle = MediaField( - MP3StorageStyle('TSST'), - MP4StorageStyle('----:com.apple.iTunes:DISCSUBTITLE'), - StorageStyle('DISCSUBTITLE'), - ASFStorageStyle('WM/SetSubTitle'), - ) - encoder = MediaField( - MP3StorageStyle('TENC'), - MP4StorageStyle('\xa9too'), - StorageStyle('ENCODEDBY'), - StorageStyle('ENCODER'), - ASFStorageStyle('WM/EncodedBy'), - ) - script = MediaField( - MP3DescStorageStyle(u'Script'), - MP4StorageStyle('----:com.apple.iTunes:SCRIPT'), - StorageStyle('SCRIPT'), - ASFStorageStyle('WM/Script'), - ) - language = MediaField( - MP3StorageStyle('TLAN'), - MP4StorageStyle('----:com.apple.iTunes:LANGUAGE'), - StorageStyle('LANGUAGE'), - ASFStorageStyle('WM/Language'), - ) - country = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Release Country'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz ' - 'Album Release Country'), - StorageStyle('RELEASECOUNTRY'), - ASFStorageStyle('MusicBrainz/Album Release Country'), - ) - albumstatus = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Status'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Status'), - StorageStyle('MUSICBRAINZ_ALBUMSTATUS'), - ASFStorageStyle('MusicBrainz/Album Status'), - ) - media = MediaField( - MP3StorageStyle('TMED'), - MP4StorageStyle('----:com.apple.iTunes:MEDIA'), - StorageStyle('MEDIA'), - ASFStorageStyle('WM/Media'), - ) - albumdisambig = MediaField( - # This tag mapping was invented for beets (not used by Picard, etc). - MP3DescStorageStyle(u'MusicBrainz Album Comment'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Comment'), - StorageStyle('MUSICBRAINZ_ALBUMCOMMENT'), - ASFStorageStyle('MusicBrainz/Album Comment'), - ) - - # Release date. - date = DateField( - MP3StorageStyle('TDRC'), - MP4StorageStyle('\xa9day'), - StorageStyle('DATE'), - ASFStorageStyle('WM/Year'), - year=(StorageStyle('YEAR'),)) - - year = date.year_field() - month = date.month_field() - day = date.day_field() - - # *Original* release date. - original_date = DateField( - MP3StorageStyle('TDOR'), - MP4StorageStyle('----:com.apple.iTunes:ORIGINAL YEAR'), - StorageStyle('ORIGINALDATE'), - ASFStorageStyle('WM/OriginalReleaseYear')) - - original_year = original_date.year_field() - original_month = original_date.month_field() - original_day = original_date.day_field() - - # Nonstandard metadata. - artist_credit = MediaField( - MP3DescStorageStyle(u'Artist Credit'), - MP4StorageStyle('----:com.apple.iTunes:Artist Credit'), - StorageStyle('ARTIST_CREDIT'), - ASFStorageStyle('beets/Artist Credit'), - ) - albumartist_credit = MediaField( - MP3DescStorageStyle(u'Album Artist Credit'), - MP4StorageStyle('----:com.apple.iTunes:Album Artist Credit'), - StorageStyle('ALBUMARTIST_CREDIT'), - ASFStorageStyle('beets/Album Artist Credit'), - ) - - # Legacy album art field - art = CoverArtField() - - # Image list - images = ImageListField() - - # MusicBrainz IDs. - mb_trackid = MediaField( - MP3UFIDStorageStyle(owner='http://musicbrainz.org'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Track Id'), - StorageStyle('MUSICBRAINZ_TRACKID'), - ASFStorageStyle('MusicBrainz/Track Id'), - ) - mb_releasetrackid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Release Track Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Release Track Id'), - StorageStyle('MUSICBRAINZ_RELEASETRACKID'), - ASFStorageStyle('MusicBrainz/Release Track Id'), - ) - mb_albumid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Id'), - StorageStyle('MUSICBRAINZ_ALBUMID'), - ASFStorageStyle('MusicBrainz/Album Id'), - ) - mb_artistid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Artist Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Artist Id'), - StorageStyle('MUSICBRAINZ_ARTISTID'), - ASFStorageStyle('MusicBrainz/Artist Id'), - ) - mb_albumartistid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Artist Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Artist Id'), - StorageStyle('MUSICBRAINZ_ALBUMARTISTID'), - ASFStorageStyle('MusicBrainz/Album Artist Id'), - ) - mb_releasegroupid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Release Group Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Release Group Id'), - StorageStyle('MUSICBRAINZ_RELEASEGROUPID'), - ASFStorageStyle('MusicBrainz/Release Group Id'), - ) - - # Acoustid fields. - acoustid_fingerprint = MediaField( - MP3DescStorageStyle(u'Acoustid Fingerprint'), - MP4StorageStyle('----:com.apple.iTunes:Acoustid Fingerprint'), - StorageStyle('ACOUSTID_FINGERPRINT'), - ASFStorageStyle('Acoustid/Fingerprint'), - ) - acoustid_id = MediaField( - MP3DescStorageStyle(u'Acoustid Id'), - MP4StorageStyle('----:com.apple.iTunes:Acoustid Id'), - StorageStyle('ACOUSTID_ID'), - ASFStorageStyle('Acoustid/Id'), - ) - - # ReplayGain fields. - rg_track_gain = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_TRACK_GAIN', - float_places=2, suffix=u' dB' - ), - MP3DescStorageStyle( - u'replaygain_track_gain', - float_places=2, suffix=u' dB' - ), - MP3SoundCheckStorageStyle( - key='COMM', - index=0, desc=u'iTunNORM', - id3_lang='eng' - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_track_gain', - float_places=2, suffix=' dB' - ), - MP4SoundCheckStorageStyle( - '----:com.apple.iTunes:iTunNORM', - index=0 - ), - StorageStyle( - u'REPLAYGAIN_TRACK_GAIN', - float_places=2, suffix=u' dB' - ), - ASFStorageStyle( - u'replaygain_track_gain', - float_places=2, suffix=u' dB' - ), - out_type=float - ) - rg_album_gain = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_ALBUM_GAIN', - float_places=2, suffix=u' dB' - ), - MP3DescStorageStyle( - u'replaygain_album_gain', - float_places=2, suffix=u' dB' - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_album_gain', - float_places=2, suffix=' dB' - ), - StorageStyle( - u'REPLAYGAIN_ALBUM_GAIN', - float_places=2, suffix=u' dB' - ), - ASFStorageStyle( - u'replaygain_album_gain', - float_places=2, suffix=u' dB' - ), - out_type=float - ) - rg_track_peak = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_TRACK_PEAK', - float_places=6 - ), - MP3DescStorageStyle( - u'replaygain_track_peak', - float_places=6 - ), - MP3SoundCheckStorageStyle( - key=u'COMM', - index=1, desc=u'iTunNORM', - id3_lang='eng' - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_track_peak', - float_places=6 - ), - MP4SoundCheckStorageStyle( - '----:com.apple.iTunes:iTunNORM', - index=1 - ), - StorageStyle(u'REPLAYGAIN_TRACK_PEAK', float_places=6), - ASFStorageStyle(u'replaygain_track_peak', float_places=6), - out_type=float, - ) - rg_album_peak = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_ALBUM_PEAK', - float_places=6 - ), - MP3DescStorageStyle( - u'replaygain_album_peak', - float_places=6 - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_album_peak', - float_places=6 - ), - StorageStyle(u'REPLAYGAIN_ALBUM_PEAK', float_places=6), - ASFStorageStyle(u'replaygain_album_peak', float_places=6), - out_type=float, - ) - - # EBU R128 fields. - r128_track_gain = MediaField( - MP3DescStorageStyle( - u'R128_TRACK_GAIN' - ), - MP4StorageStyle( - '----:com.apple.iTunes:R128_TRACK_GAIN' - ), - StorageStyle( - u'R128_TRACK_GAIN' - ), - ASFStorageStyle( - u'R128_TRACK_GAIN' - ), - out_type=int, - ) - r128_album_gain = MediaField( - MP3DescStorageStyle( - u'R128_ALBUM_GAIN' - ), - MP4StorageStyle( - '----:com.apple.iTunes:R128_ALBUM_GAIN' - ), - StorageStyle( - u'R128_ALBUM_GAIN' - ), - ASFStorageStyle( - u'R128_ALBUM_GAIN' - ), - out_type=int, - ) - - initial_key = MediaField( - MP3StorageStyle('TKEY'), - MP4StorageStyle('----:com.apple.iTunes:initialkey'), - StorageStyle('INITIALKEY'), - ASFStorageStyle('INITIALKEY'), - ) - - @property - def length(self): - """The duration of the audio in seconds (a float).""" - return self.mgfile.info.length - - @property - def samplerate(self): - """The audio's sample rate (an int).""" - if hasattr(self.mgfile.info, 'sample_rate'): - return self.mgfile.info.sample_rate - elif self.type == 'opus': - # Opus is always 48kHz internally. - return 48000 - return 0 - - @property - def bitdepth(self): - """The number of bits per sample in the audio encoding (an int). - Only available for certain file formats (zero where - unavailable). - """ - if hasattr(self.mgfile.info, 'bits_per_sample'): - return self.mgfile.info.bits_per_sample - return 0 - - @property - def channels(self): - """The number of channels in the audio (an int).""" - if hasattr(self.mgfile.info, 'channels'): - return self.mgfile.info.channels - return 0 - - @property - def bitrate(self): - """The number of bits per seconds used in the audio coding (an - int). If this is provided explicitly by the compressed file - format, this is a precise reflection of the encoding. Otherwise, - it is estimated from the on-disk file size. In this case, some - imprecision is possible because the file header is incorporated - in the file size. - """ - if hasattr(self.mgfile.info, 'bitrate') and self.mgfile.info.bitrate: - # Many formats provide it explicitly. - return self.mgfile.info.bitrate - else: - # Otherwise, we calculate bitrate from the file size. (This - # is the case for all of the lossless formats.) - if not self.length: - # Avoid division by zero if length is not available. - return 0 - size = os.path.getsize(self.path) - return int(size * 8 / self.length) - - @property - def format(self): - """A string describing the file format/codec.""" - return TYPES[self.type] +del key, value, warnings, mediafile diff --git a/libs/common/beets/plugins.py b/libs/common/beets/plugins.py index 1bd2cacd..ed1f82d8 100644 --- a/libs/common/beets/plugins.py +++ b/libs/common/beets/plugins.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,19 +14,19 @@ """Support for beets plugins.""" -from __future__ import division, absolute_import, print_function -import inspect import traceback import re +import inspect +import abc from collections import defaultdict from functools import wraps import beets from beets import logging -from beets import mediafile -import six +import mediafile + PLUGIN_NAMESPACE = 'beetsplug' @@ -50,26 +49,28 @@ class PluginLogFilter(logging.Filter): """A logging filter that identifies the plugin that emitted a log message. """ + def __init__(self, plugin): - self.prefix = u'{0}: '.format(plugin.name) + self.prefix = f'{plugin.name}: ' def filter(self, record): if hasattr(record.msg, 'msg') and isinstance(record.msg.msg, - six.string_types): + str): # A _LogMessage from our hacked-up Logging replacement. record.msg.msg = self.prefix + record.msg.msg - elif isinstance(record.msg, six.string_types): + elif isinstance(record.msg, str): record.msg = self.prefix + record.msg return True # Managing the plugins themselves. -class BeetsPlugin(object): +class BeetsPlugin: """The base class for all beets plugins. Plugins provide functionality by defining a subclass of BeetsPlugin and overriding the abstract methods defined here. """ + def __init__(self, name=None): """Perform one-time plugin setup. """ @@ -127,27 +128,24 @@ class BeetsPlugin(object): value after the function returns). Also determines which params may not be sent for backwards-compatibility. """ - argspec = inspect.getargspec(func) + argspec = inspect.getfullargspec(func) @wraps(func) def wrapper(*args, **kwargs): assert self._log.level == logging.NOTSET + verbosity = beets.config['verbose'].get(int) log_level = max(logging.DEBUG, base_log_level - 10 * verbosity) self._log.setLevel(log_level) + if argspec.varkw is None: + kwargs = {k: v for k, v in kwargs.items() + if k in argspec.args} + try: - try: - return func(*args, **kwargs) - except TypeError as exc: - if exc.args[0].startswith(func.__name__): - # caused by 'func' and not stuff internal to 'func' - kwargs = dict((arg, val) for arg, val in kwargs.items() - if arg in argspec.args) - return func(*args, **kwargs) - else: - raise + return func(*args, **kwargs) finally: self._log.setLevel(logging.NOTSET) + return wrapper def queries(self): @@ -167,7 +165,7 @@ class BeetsPlugin(object): """ return beets.autotag.hooks.Distance() - def candidates(self, items, artist, album, va_likely): + def candidates(self, items, artist, album, va_likely, extra_tags=None): """Should return a sequence of AlbumInfo objects that match the album whose items are provided. """ @@ -201,7 +199,7 @@ class BeetsPlugin(object): ``descriptor`` must be an instance of ``mediafile.MediaField``. """ - # Defer impor to prevent circular dependency + # Defer import to prevent circular dependency from beets import library mediafile.MediaFile.add_field(name, descriptor) library.Item._media_fields.add(name) @@ -264,14 +262,14 @@ def load_plugins(names=()): BeetsPlugin subclasses desired. """ for name in names: - modname = '{0}.{1}'.format(PLUGIN_NAMESPACE, name) + modname = f'{PLUGIN_NAMESPACE}.{name}' try: try: namespace = __import__(modname, None, None) except ImportError as exc: # Again, this is hacky: if exc.args[0].endswith(' ' + name): - log.warning(u'** plugin {0} not found', name) + log.warning('** plugin {0} not found', name) else: raise else: @@ -282,7 +280,7 @@ def load_plugins(names=()): except Exception: log.warning( - u'** error loading plugin {}:\n{}', + '** error loading plugin {}:\n{}', name, traceback.format_exc(), ) @@ -296,6 +294,11 @@ def find_plugins(): currently loaded beets plugins. Loads the default plugin set first. """ + if _instances: + # After the first call, use cached instances for performance reasons. + # See https://github.com/beetbox/beets/pull/3810 + return list(_instances.values()) + load_plugins() plugins = [] for cls in _classes: @@ -329,21 +332,31 @@ def queries(): def types(model_cls): # Gives us `item_types` and `album_types` - attr_name = '{0}_types'.format(model_cls.__name__.lower()) + attr_name = f'{model_cls.__name__.lower()}_types' types = {} for plugin in find_plugins(): plugin_types = getattr(plugin, attr_name, {}) for field in plugin_types: if field in types and plugin_types[field] != types[field]: raise PluginConflictException( - u'Plugin {0} defines flexible field {1} ' - u'which has already been defined with ' - u'another type.'.format(plugin.name, field) + 'Plugin {} defines flexible field {} ' + 'which has already been defined with ' + 'another type.'.format(plugin.name, field) ) types.update(plugin_types) return types +def named_queries(model_cls): + # Gather `item_queries` and `album_queries` from the plugins. + attr_name = f'{model_cls.__name__.lower()}_queries' + queries = {} + for plugin in find_plugins(): + plugin_queries = getattr(plugin, attr_name, {}) + queries.update(plugin_queries) + return queries + + def track_distance(item, info): """Gets the track distance calculated by all loaded plugins. Returns a Distance object. @@ -364,20 +377,19 @@ def album_distance(items, album_info, mapping): return dist -def candidates(items, artist, album, va_likely): +def candidates(items, artist, album, va_likely, extra_tags=None): """Gets MusicBrainz candidates for an album from each plugin. """ for plugin in find_plugins(): - for candidate in plugin.candidates(items, artist, album, va_likely): - yield candidate + yield from plugin.candidates(items, artist, album, va_likely, + extra_tags) def item_candidates(item, artist, title): """Gets MusicBrainz candidates for an item from the plugins. """ for plugin in find_plugins(): - for item_candidate in plugin.item_candidates(item, artist, title): - yield item_candidate + yield from plugin.item_candidates(item, artist, title) def album_for_id(album_id): @@ -470,7 +482,7 @@ def send(event, **arguments): Return a list of non-None values returned from the handlers. """ - log.debug(u'Sending event: {0}', event) + log.debug('Sending event: {0}', event) results = [] for handler in event_handlers()[event]: result = handler(**arguments) @@ -488,7 +500,7 @@ def feat_tokens(for_artist=True): feat_words = ['ft', 'featuring', 'feat', 'feat.', 'ft.'] if for_artist: feat_words += ['with', 'vs', 'and', 'con', '&'] - return '(?<=\s)(?:{0})(?=\s)'.format( + return r'(?<=\s)(?:{})(?=\s)'.format( '|'.join(re.escape(x) for x in feat_words) ) @@ -513,7 +525,7 @@ def sanitize_choices(choices, choices_all): def sanitize_pairs(pairs, pairs_all): """Clean up a single-element mapping configuration attribute as returned - by `confit`'s `Pairs` template: keep only two-element tuples present in + by Confuse's `Pairs` template: keep only two-element tuples present in pairs_all, remove duplicate elements, expand ('str', '*') and ('*', '*') wildcards while keeping the original order. Note that ('*', '*') and ('*', 'whatever') have the same effect. @@ -563,3 +575,188 @@ def notify_info_yielded(event): yield v return decorated return decorator + + +def get_distance(config, data_source, info): + """Returns the ``data_source`` weight and the maximum source weight + for albums or individual tracks. + """ + dist = beets.autotag.Distance() + if info.data_source == data_source: + dist.add('source', config['source_weight'].as_number()) + return dist + + +def apply_item_changes(lib, item, move, pretend, write): + """Store, move, and write the item according to the arguments. + + :param lib: beets library. + :type lib: beets.library.Library + :param item: Item whose changes to apply. + :type item: beets.library.Item + :param move: Move the item if it's in the library. + :type move: bool + :param pretend: Return without moving, writing, or storing the item's + metadata. + :type pretend: bool + :param write: Write the item's metadata to its media file. + :type write: bool + """ + if pretend: + return + + from beets import util + + # Move the item if it's in the library. + if move and lib.directory in util.ancestry(item.path): + item.move(with_album=False) + + if write: + item.try_write() + + item.store() + + +class MetadataSourcePlugin(metaclass=abc.ABCMeta): + def __init__(self): + super().__init__() + self.config.add({'source_weight': 0.5}) + + @abc.abstractproperty + def id_regex(self): + raise NotImplementedError + + @abc.abstractproperty + def data_source(self): + raise NotImplementedError + + @abc.abstractproperty + def search_url(self): + raise NotImplementedError + + @abc.abstractproperty + def album_url(self): + raise NotImplementedError + + @abc.abstractproperty + def track_url(self): + raise NotImplementedError + + @abc.abstractmethod + def _search_api(self, query_type, filters, keywords=''): + raise NotImplementedError + + @abc.abstractmethod + def album_for_id(self, album_id): + raise NotImplementedError + + @abc.abstractmethod + def track_for_id(self, track_id=None, track_data=None): + raise NotImplementedError + + @staticmethod + def get_artist(artists, id_key='id', name_key='name'): + """Returns an artist string (all artists) and an artist_id (the main + artist) for a list of artist object dicts. + + For each artist, this function moves articles (such as 'a', 'an', + and 'the') to the front and strips trailing disambiguation numbers. It + returns a tuple containing the comma-separated string of all + normalized artists and the ``id`` of the main/first artist. + + :param artists: Iterable of artist dicts or lists returned by API. + :type artists: list[dict] or list[list] + :param id_key: Key or index corresponding to the value of ``id`` for + the main/first artist. Defaults to 'id'. + :type id_key: str or int + :param name_key: Key or index corresponding to values of names + to concatenate for the artist string (containing all artists). + Defaults to 'name'. + :type name_key: str or int + :return: Normalized artist string. + :rtype: str + """ + artist_id = None + artist_names = [] + for artist in artists: + if not artist_id: + artist_id = artist[id_key] + name = artist[name_key] + # Strip disambiguation number. + name = re.sub(r' \(\d+\)$', '', name) + # Move articles to the front. + name = re.sub(r'^(.*?), (a|an|the)$', r'\2 \1', name, flags=re.I) + artist_names.append(name) + artist = ', '.join(artist_names).replace(' ,', ',') or None + return artist, artist_id + + def _get_id(self, url_type, id_): + """Parse an ID from its URL if necessary. + + :param url_type: Type of URL. Either 'album' or 'track'. + :type url_type: str + :param id_: Album/track ID or URL. + :type id_: str + :return: Album/track ID. + :rtype: str + """ + self._log.debug( + "Searching {} for {} '{}'", self.data_source, url_type, id_ + ) + match = re.search(self.id_regex['pattern'].format(url_type), str(id_)) + if match: + id_ = match.group(self.id_regex['match_group']) + if id_: + return id_ + return None + + def candidates(self, items, artist, album, va_likely, extra_tags=None): + """Returns a list of AlbumInfo objects for Search API results + matching an ``album`` and ``artist`` (if not various). + + :param items: List of items comprised by an album to be matched. + :type items: list[beets.library.Item] + :param artist: The artist of the album to be matched. + :type artist: str + :param album: The name of the album to be matched. + :type album: str + :param va_likely: True if the album to be matched likely has + Various Artists. + :type va_likely: bool + :return: Candidate AlbumInfo objects. + :rtype: list[beets.autotag.hooks.AlbumInfo] + """ + query_filters = {'album': album} + if not va_likely: + query_filters['artist'] = artist + results = self._search_api(query_type='album', filters=query_filters) + albums = [self.album_for_id(album_id=r['id']) for r in results] + return [a for a in albums if a is not None] + + def item_candidates(self, item, artist, title): + """Returns a list of TrackInfo objects for Search API results + matching ``title`` and ``artist``. + + :param item: Singleton item to be matched. + :type item: beets.library.Item + :param artist: The artist of the track to be matched. + :type artist: str + :param title: The title of the track to be matched. + :type title: str + :return: Candidate TrackInfo objects. + :rtype: list[beets.autotag.hooks.TrackInfo] + """ + tracks = self._search_api( + query_type='track', keywords=title, filters={'artist': artist} + ) + return [self.track_for_id(track_data=track) for track in tracks] + + def album_distance(self, items, album_info, mapping): + return get_distance( + data_source=self.data_source, info=album_info, config=self.config + ) + + def track_distance(self, item, track_info): + return get_distance( + data_source=self.data_source, info=track_info, config=self.config + ) diff --git a/libs/common/beets/random.py b/libs/common/beets/random.py new file mode 100644 index 00000000..eb4f55af --- /dev/null +++ b/libs/common/beets/random.py @@ -0,0 +1,113 @@ +# This file is part of beets. +# Copyright 2016, Philippe Mongeau. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Get a random song or album from the library. +""" + +import random +from operator import attrgetter +from itertools import groupby + + +def _length(obj, album): + """Get the duration of an item or album. + """ + if album: + return sum(i.length for i in obj.items()) + else: + return obj.length + + +def _equal_chance_permutation(objs, field='albumartist', random_gen=None): + """Generate (lazily) a permutation of the objects where every group + with equal values for `field` have an equal chance of appearing in + any given position. + """ + rand = random_gen or random + + # Group the objects by artist so we can sample from them. + key = attrgetter(field) + objs.sort(key=key) + objs_by_artists = {} + for artist, v in groupby(objs, key): + objs_by_artists[artist] = list(v) + + # While we still have artists with music to choose from, pick one + # randomly and pick a track from that artist. + while objs_by_artists: + # Choose an artist and an object for that artist, removing + # this choice from the pool. + artist = rand.choice(list(objs_by_artists.keys())) + objs_from_artist = objs_by_artists[artist] + i = rand.randint(0, len(objs_from_artist) - 1) + yield objs_from_artist.pop(i) + + # Remove the artist if we've used up all of its objects. + if not objs_from_artist: + del objs_by_artists[artist] + + +def _take(iter, num): + """Return a list containing the first `num` values in `iter` (or + fewer, if the iterable ends early). + """ + out = [] + for val in iter: + out.append(val) + num -= 1 + if num <= 0: + break + return out + + +def _take_time(iter, secs, album): + """Return a list containing the first values in `iter`, which should + be Item or Album objects, that add up to the given amount of time in + seconds. + """ + out = [] + total_time = 0.0 + for obj in iter: + length = _length(obj, album) + if total_time + length <= secs: + out.append(obj) + total_time += length + return out + + +def random_objs(objs, album, number=1, time=None, equal_chance=False, + random_gen=None): + """Get a random subset of the provided `objs`. + + If `number` is provided, produce that many matches. Otherwise, if + `time` is provided, instead select a list whose total time is close + to that number of minutes. If `equal_chance` is true, give each + artist an equal chance of being included so that artists with more + songs are not represented disproportionately. + """ + rand = random_gen or random + + # Permute the objects either in a straightforward way or an + # artist-balanced way. + if equal_chance: + perm = _equal_chance_permutation(objs) + else: + perm = objs + rand.shuffle(perm) # N.B. This shuffles the original list. + + # Select objects by time our count. + if time: + return _take_time(perm, time * 60, album) + else: + return _take(perm, number) diff --git a/libs/common/beets/ui/__init__.py b/libs/common/beets/ui/__init__.py index af2b79a1..121cb5dc 100644 --- a/libs/common/beets/ui/__init__.py +++ b/libs/common/beets/ui/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -18,7 +17,6 @@ interface. To invoke the CLI, just call beets.ui.main(). The actual CLI commands are implemented in the ui.commands module. """ -from __future__ import division, absolute_import, print_function import optparse import textwrap @@ -30,19 +28,18 @@ import re import struct import traceback import os.path -from six.moves import input from beets import logging from beets import library from beets import plugins from beets import util -from beets.util.functemplate import Template +from beets.util.functemplate import template from beets import config -from beets.util import confit, as_string +from beets.util import as_string from beets.autotag import mb from beets.dbcore import query as db_query from beets.dbcore import db -import six +import confuse # On Windows platforms, use colorama to support "ANSI" terminal colors. if sys.platform == 'win32': @@ -61,8 +58,8 @@ log.propagate = False # Don't propagate to root handler. PF_KEY_QUERIES = { - 'comp': u'comp:true', - 'singleton': u'singleton:true', + 'comp': 'comp:true', + 'singleton': 'singleton:true', } @@ -112,10 +109,7 @@ def decargs(arglist): """Given a list of command-line argument bytestrings, attempts to decode them to Unicode strings when running under Python 2. """ - if six.PY2: - return [s.decode(util.arg_encoding()) for s in arglist] - else: - return arglist + return arglist def print_(*strings, **kwargs): @@ -130,30 +124,25 @@ def print_(*strings, **kwargs): (it defaults to a newline). """ if not strings: - strings = [u''] - assert isinstance(strings[0], six.text_type) + strings = [''] + assert isinstance(strings[0], str) - txt = u' '.join(strings) - txt += kwargs.get('end', u'\n') + txt = ' '.join(strings) + txt += kwargs.get('end', '\n') # Encode the string and write it to stdout. - if six.PY2: - # On Python 2, sys.stdout expects bytes. + # On Python 3, sys.stdout expects text strings and uses the + # exception-throwing encoding error policy. To avoid throwing + # errors and use our configurable encoding override, we use the + # underlying bytes buffer instead. + if hasattr(sys.stdout, 'buffer'): out = txt.encode(_out_encoding(), 'replace') - sys.stdout.write(out) + sys.stdout.buffer.write(out) + sys.stdout.buffer.flush() else: - # On Python 3, sys.stdout expects text strings and uses the - # exception-throwing encoding error policy. To avoid throwing - # errors and use our configurable encoding override, we use the - # underlying bytes buffer instead. - if hasattr(sys.stdout, 'buffer'): - out = txt.encode(_out_encoding(), 'replace') - sys.stdout.buffer.write(out) - sys.stdout.buffer.flush() - else: - # In our test harnesses (e.g., DummyOut), sys.stdout.buffer - # does not exist. We instead just record the text string. - sys.stdout.write(txt) + # In our test harnesses (e.g., DummyOut), sys.stdout.buffer + # does not exist. We instead just record the text string. + sys.stdout.write(txt) # Configuration wrappers. @@ -203,19 +192,16 @@ def input_(prompt=None): """ # raw_input incorrectly sends prompts to stderr, not stdout, so we # use print_() explicitly to display prompts. - # http://bugs.python.org/issue1927 + # https://bugs.python.org/issue1927 if prompt: - print_(prompt, end=u' ') + print_(prompt, end=' ') try: resp = input() except EOFError: - raise UserError(u'stdin stream ended while input required') + raise UserError('stdin stream ended while input required') - if six.PY2: - return resp.decode(_in_encoding(), 'ignore') - else: - return resp + return resp def input_options(options, require=False, prompt=None, fallback_prompt=None, @@ -259,7 +245,7 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, found_letter = letter break else: - raise ValueError(u'no unambiguous lettering found') + raise ValueError('no unambiguous lettering found') letters[found_letter.lower()] = option index = option.index(found_letter) @@ -267,7 +253,7 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, # Mark the option's shortcut letter for display. if not require and ( (default is None and not numrange and first) or - (isinstance(default, six.string_types) and + (isinstance(default, str) and found_letter.lower() == default.lower())): # The first option is the default; mark it. show_letter = '[%s]' % found_letter.upper() @@ -303,11 +289,11 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, prompt_part_lengths = [] if numrange: if isinstance(default, int): - default_name = six.text_type(default) + default_name = str(default) default_name = colorize('action_default', default_name) tmpl = '# selection (default %s)' prompt_parts.append(tmpl % default_name) - prompt_part_lengths.append(len(tmpl % six.text_type(default))) + prompt_part_lengths.append(len(tmpl % str(default))) else: prompt_parts.append('# selection') prompt_part_lengths.append(len(prompt_parts[-1])) @@ -342,9 +328,9 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, # Make a fallback prompt too. This is displayed if the user enters # something that is not recognized. if not fallback_prompt: - fallback_prompt = u'Enter one of ' + fallback_prompt = 'Enter one of ' if numrange: - fallback_prompt += u'%i-%i, ' % numrange + fallback_prompt += '%i-%i, ' % numrange fallback_prompt += ', '.join(display_letters) + ':' resp = input_(prompt) @@ -383,34 +369,41 @@ def input_yn(prompt, require=False): "yes" unless `require` is `True`, in which case there is no default. """ sel = input_options( - ('y', 'n'), require, prompt, u'Enter Y or N:' + ('y', 'n'), require, prompt, 'Enter Y or N:' ) - return sel == u'y' + return sel == 'y' -def input_select_objects(prompt, objs, rep): +def input_select_objects(prompt, objs, rep, prompt_all=None): """Prompt to user to choose all, none, or some of the given objects. Return the list of selected objects. `prompt` is the prompt string to use for each question (it should be - phrased as an imperative verb). `rep` is a function to call on each - object to print it out when confirming objects individually. + phrased as an imperative verb). If `prompt_all` is given, it is used + instead of `prompt` for the first (yes(/no/select) question. + `rep` is a function to call on each object to print it out when confirming + objects individually. """ choice = input_options( - (u'y', u'n', u's'), False, - u'%s? (Yes/no/select)' % prompt) + ('y', 'n', 's'), False, + '%s? (Yes/no/select)' % (prompt_all or prompt)) print() # Blank line. - if choice == u'y': # Yes. + if choice == 'y': # Yes. return objs - elif choice == u's': # Select. + elif choice == 's': # Select. out = [] for obj in objs: rep(obj) - if input_yn(u'%s? (yes/no)' % prompt, True): + answer = input_options( + ('y', 'n', 'q'), True, '%s? (yes/no/quit)' % prompt, + 'Enter Y or N:' + ) + if answer == 'y': out.append(obj) - print() # go to a new line + elif answer == 'q': + return out return out else: # No. @@ -421,14 +414,14 @@ def input_select_objects(prompt, objs, rep): def human_bytes(size): """Formats size, a number of bytes, in a human-readable way.""" - powers = [u'', u'K', u'M', u'G', u'T', u'P', u'E', u'Z', u'Y', u'H'] + powers = ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y', 'H'] unit = 'B' for power in powers: if size < 1024: - return u"%3.1f %s%s" % (size, power, unit) + return f"{size:3.1f} {power}{unit}" size /= 1024.0 - unit = u'iB' - return u"big" + unit = 'iB' + return "big" def human_seconds(interval): @@ -436,13 +429,13 @@ def human_seconds(interval): interval using English words. """ units = [ - (1, u'second'), - (60, u'minute'), - (60, u'hour'), - (24, u'day'), - (7, u'week'), - (52, u'year'), - (10, u'decade'), + (1, 'second'), + (60, 'minute'), + (60, 'hour'), + (24, 'day'), + (7, 'week'), + (52, 'year'), + (10, 'decade'), ] for i in range(len(units) - 1): increment, suffix = units[i] @@ -455,7 +448,7 @@ def human_seconds(interval): increment, suffix = units[-1] interval /= float(increment) - return u"%3.1f %ss" % (interval, suffix) + return f"{interval:3.1f} {suffix}s" def human_seconds_short(interval): @@ -463,13 +456,13 @@ def human_seconds_short(interval): string. """ interval = int(interval) - return u'%i:%02i' % (interval // 60, interval % 60) + return '%i:%02i' % (interval // 60, interval % 60) # Colorization. # ANSI terminal colorization code heavily inspired by pygments: -# http://dev.pocoo.org/hg/pygments-main/file/b2deea5b5030/pygments/console.py +# https://bitbucket.org/birkenfeld/pygments-main/src/default/pygments/console.py # (pygments is by Tim Hatch, Armin Ronacher, et al.) COLOR_ESCAPE = "\x1b[" DARK_COLORS = { @@ -516,7 +509,7 @@ def _colorize(color, text): elif color in LIGHT_COLORS: escape = COLOR_ESCAPE + "%i;01m" % (LIGHT_COLORS[color] + 30) else: - raise ValueError(u'no such color %s', color) + raise ValueError('no such color %s', color) return escape + text + RESET_COLOR @@ -524,22 +517,22 @@ def colorize(color_name, text): """Colorize text if colored output is enabled. (Like _colorize but conditional.) """ - if config['ui']['color']: - global COLORS - if not COLORS: - COLORS = dict((name, - config['ui']['colors'][name].as_str()) - for name in COLOR_NAMES) - # In case a 3rd party plugin is still passing the actual color ('red') - # instead of the abstract color name ('text_error') - color = COLORS.get(color_name) - if not color: - log.debug(u'Invalid color_name: {0}', color_name) - color = color_name - return _colorize(color, text) - else: + if not config['ui']['color'] or 'NO_COLOR' in os.environ.keys(): return text + global COLORS + if not COLORS: + COLORS = {name: + config['ui']['colors'][name].as_str() + for name in COLOR_NAMES} + # In case a 3rd party plugin is still passing the actual color ('red') + # instead of the abstract color name ('text_error') + color = COLORS.get(color_name) + if not color: + log.debug('Invalid color_name: {0}', color_name) + color = color_name + return _colorize(color, text) + def _colordiff(a, b, highlight='text_highlight', minor_highlight='text_highlight_minor'): @@ -548,11 +541,11 @@ def _colordiff(a, b, highlight='text_highlight', highlighted intelligently to show differences; other values are stringified and highlighted in their entirety. """ - if not isinstance(a, six.string_types) \ - or not isinstance(b, six.string_types): + if not isinstance(a, str) \ + or not isinstance(b, str): # Non-strings: use ordinary equality. - a = six.text_type(a) - b = six.text_type(b) + a = str(a) + b = str(b) if a == b: return a, b else: @@ -590,7 +583,7 @@ def _colordiff(a, b, highlight='text_highlight', else: assert(False) - return u''.join(a_out), u''.join(b_out) + return ''.join(a_out), ''.join(b_out) def colordiff(a, b, highlight='text_highlight'): @@ -600,7 +593,7 @@ def colordiff(a, b, highlight='text_highlight'): if config['ui']['color']: return _colordiff(a, b, highlight) else: - return six.text_type(a), six.text_type(b) + return str(a), str(b) def get_path_formats(subview=None): @@ -611,12 +604,12 @@ def get_path_formats(subview=None): subview = subview or config['paths'] for query, view in subview.items(): query = PF_KEY_QUERIES.get(query, query) # Expand common queries. - path_formats.append((query, Template(view.as_str()))) + path_formats.append((query, template(view.as_str()))) return path_formats def get_replacements(): - """Confit validation function that reads regex/string pairs. + """Confuse validation function that reads regex/string pairs. """ replacements = [] for pattern, repl in config['replace'].get(dict).items(): @@ -625,7 +618,7 @@ def get_replacements(): replacements.append((re.compile(pattern), repl)) except re.error: raise UserError( - u'malformed regular expression in replace: {0}'.format( + 'malformed regular expression in replace: {}'.format( pattern ) ) @@ -646,7 +639,7 @@ def term_width(): try: buf = fcntl.ioctl(0, termios.TIOCGWINSZ, ' ' * 4) - except IOError: + except OSError: return fallback try: height, width = struct.unpack('hh', buf) @@ -658,10 +651,10 @@ def term_width(): FLOAT_EPSILON = 0.01 -def _field_diff(field, old, new): - """Given two Model objects, format their values for `field` and - highlight changes among them. Return a human-readable string. If the - value has not changed, return None instead. +def _field_diff(field, old, old_fmt, new, new_fmt): + """Given two Model objects and their formatted views, format their values + for `field` and highlight changes among them. Return a human-readable + string. If the value has not changed, return None instead. """ oldval = old.get(field) newval = new.get(field) @@ -674,18 +667,18 @@ def _field_diff(field, old, new): return None # Get formatted values for output. - oldstr = old.formatted().get(field, u'') - newstr = new.formatted().get(field, u'') + oldstr = old_fmt.get(field, '') + newstr = new_fmt.get(field, '') # For strings, highlight changes. For others, colorize the whole # thing. - if isinstance(oldval, six.string_types): + if isinstance(oldval, str): oldstr, newstr = colordiff(oldval, newstr) else: oldstr = colorize('text_error', oldstr) newstr = colorize('text_error', newstr) - return u'{0} -> {1}'.format(oldstr, newstr) + return f'{oldstr} -> {newstr}' def show_model_changes(new, old=None, fields=None, always=False): @@ -700,6 +693,11 @@ def show_model_changes(new, old=None, fields=None, always=False): """ old = old or new._db._get(type(new), new.id) + # Keep the formatted views around instead of re-creating them in each + # iteration step + old_fmt = old.formatted() + new_fmt = new.formatted() + # Build up lines showing changed fields. changes = [] for field in old: @@ -708,25 +706,25 @@ def show_model_changes(new, old=None, fields=None, always=False): continue # Detect and show difference for this field. - line = _field_diff(field, old, new) + line = _field_diff(field, old, old_fmt, new, new_fmt) if line: - changes.append(u' {0}: {1}'.format(field, line)) + changes.append(f' {field}: {line}') # New fields. for field in set(new) - set(old): if fields and field not in fields: continue - changes.append(u' {0}: {1}'.format( + changes.append(' {}: {}'.format( field, - colorize('text_highlight', new.formatted()[field]) + colorize('text_highlight', new_fmt[field]) )) # Print changes. if changes or always: print_(format(old)) if changes: - print_(u'\n'.join(changes)) + print_('\n'.join(changes)) return bool(changes) @@ -759,15 +757,21 @@ def show_path_changes(path_changes): if max_width > col_width: # Print every change over two lines for source, dest in zip(sources, destinations): - log.info(u'{0} \n -> {1}', source, dest) + color_source, color_dest = colordiff(source, dest) + print_('{0} \n -> {1}'.format(color_source, color_dest)) else: # Print every change on a single line, and add a header title_pad = max_width - len('Source ') + len(' -> ') - log.info(u'Source {0} Destination', ' ' * title_pad) + print_('Source {0} Destination'.format(' ' * title_pad)) for source, dest in zip(sources, destinations): pad = max_width - len(source) - log.info(u'{0} {1} -> {2}', source, ' ' * pad, dest) + color_source, color_dest = colordiff(source, dest) + print_('{0} {1} -> {2}'.format( + color_source, + ' ' * pad, + color_dest, + )) # Helper functions for option parsing. @@ -783,22 +787,25 @@ def _store_dict(option, opt_str, value, parser): if option_values is None: # This is the first supplied ``key=value`` pair of option. # Initialize empty dictionary and get a reference to it. - setattr(parser.values, dest, dict()) + setattr(parser.values, dest, {}) option_values = getattr(parser.values, dest) + # Decode the argument using the platform's argument encoding. + value = util.text_string(value, util.arg_encoding()) + try: - key, value = map(lambda s: util.text_string(s), value.split('=')) + key, value = value.split('=', 1) if not (key and value): raise ValueError except ValueError: raise UserError( - "supplied argument `{0}' is not of the form `key=value'" + "supplied argument `{}' is not of the form `key=value'" .format(value)) option_values[key] = value -class CommonOptionsParser(optparse.OptionParser, object): +class CommonOptionsParser(optparse.OptionParser): """Offers a simple way to add common formatting options. Options available include: @@ -813,8 +820,9 @@ class CommonOptionsParser(optparse.OptionParser, object): Each method is fully documented in the related method. """ + def __init__(self, *args, **kwargs): - super(CommonOptionsParser, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self._album_flags = False # this serves both as an indicator that we offer the feature AND allows # us to check whether it has been specified on the CLI - bypassing the @@ -828,7 +836,7 @@ class CommonOptionsParser(optparse.OptionParser, object): Sets the album property on the options extracted from the CLI. """ album = optparse.Option(*flags, action='store_true', - help=u'match albums instead of tracks') + help='match albums instead of tracks') self.add_option(album) self._album_flags = set(flags) @@ -846,7 +854,7 @@ class CommonOptionsParser(optparse.OptionParser, object): elif value: value, = decargs([value]) else: - value = u'' + value = '' parser.values.format = value if target: @@ -873,14 +881,14 @@ class CommonOptionsParser(optparse.OptionParser, object): By default this affects both items and albums. If add_album_option() is used then the target will be autodetected. - Sets the format property to u'$path' on the options extracted from the + Sets the format property to '$path' on the options extracted from the CLI. """ path = optparse.Option(*flags, nargs=0, action='callback', callback=self._set_format, - callback_kwargs={'fmt': u'$path', + callback_kwargs={'fmt': '$path', 'store_true': True}, - help=u'print paths for matched items or albums') + help='print paths for matched items or albums') self.add_option(path) def add_format_option(self, flags=('-f', '--format'), target=None): @@ -900,7 +908,7 @@ class CommonOptionsParser(optparse.OptionParser, object): """ kwargs = {} if target: - if isinstance(target, six.string_types): + if isinstance(target, str): target = {'item': library.Item, 'album': library.Album}[target] kwargs['target'] = target @@ -908,7 +916,7 @@ class CommonOptionsParser(optparse.OptionParser, object): opt = optparse.Option(*flags, action='callback', callback=self._set_format, callback_kwargs=kwargs, - help=u'print with custom format') + help='print with custom format') self.add_option(opt) def add_all_common_options(self): @@ -923,14 +931,15 @@ class CommonOptionsParser(optparse.OptionParser, object): # # This is a fairly generic subcommand parser for optparse. It is # maintained externally here: -# http://gist.github.com/462717 +# https://gist.github.com/462717 # There you will also find a better description of the code and a more # succinct example program. -class Subcommand(object): +class Subcommand: """A subcommand of a root command-line application that may be invoked by a SubcommandOptionParser. """ + def __init__(self, name, parser=None, help='', aliases=(), hide=False): """Creates a new subcommand. name is the primary way to invoke the subcommand; aliases are alternate names. parser is an @@ -958,7 +967,7 @@ class Subcommand(object): @root_parser.setter def root_parser(self, root_parser): self._root_parser = root_parser - self.parser.prog = '{0} {1}'.format( + self.parser.prog = '{} {}'.format( as_string(root_parser.get_prog_name()), self.name) @@ -974,13 +983,13 @@ class SubcommandsOptionParser(CommonOptionsParser): """ # A more helpful default usage. if 'usage' not in kwargs: - kwargs['usage'] = u""" + kwargs['usage'] = """ %prog COMMAND [ARGS...] %prog help COMMAND""" kwargs['add_help_option'] = False # Super constructor. - super(SubcommandsOptionParser, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) # Our root parser needs to stop on the first unrecognized argument. self.disable_interspersed_args() @@ -997,7 +1006,7 @@ class SubcommandsOptionParser(CommonOptionsParser): # Add the list of subcommands to the help message. def format_help(self, formatter=None): # Get the original help message, to which we will append. - out = super(SubcommandsOptionParser, self).format_help(formatter) + out = super().format_help(formatter) if formatter is None: formatter = self.formatter @@ -1083,7 +1092,7 @@ class SubcommandsOptionParser(CommonOptionsParser): cmdname = args.pop(0) subcommand = self._subcommand_for_name(cmdname) if not subcommand: - raise UserError(u"unknown command '{0}'".format(cmdname)) + raise UserError(f"unknown command '{cmdname}'") suboptions, subargs = subcommand.parse_args(args) return subcommand, suboptions, subargs @@ -1094,26 +1103,32 @@ optparse.Option.ALWAYS_TYPED_ACTIONS += ('callback',) # The main entry point and bootstrapping. -def _load_plugins(config): - """Load the plugins specified in the configuration. +def _load_plugins(options, config): + """Load the plugins specified on the command line or in the configuration. """ paths = config['pluginpath'].as_str_seq(split=False) paths = [util.normpath(p) for p in paths] - log.debug(u'plugin paths: {0}', util.displayable_path(paths)) + log.debug('plugin paths: {0}', util.displayable_path(paths)) # On Python 3, the search paths need to be unicode. paths = [util.py3_path(p) for p in paths] # Extend the `beetsplug` package to include the plugin paths. import beetsplug - beetsplug.__path__ = paths + beetsplug.__path__ + beetsplug.__path__ = paths + list(beetsplug.__path__) # For backwards compatibility, also support plugin paths that # *contain* a `beetsplug` package. sys.path += paths - plugins.load_plugins(config['plugins'].as_str_seq()) - plugins.send("pluginload") + # If we were given any plugins on the command line, use those. + if options.plugins is not None: + plugin_list = (options.plugins.split(',') + if len(options.plugins) > 0 else []) + else: + plugin_list = config['plugins'].as_str_seq() + + plugins.load_plugins(plugin_list) return plugins @@ -1127,7 +1142,20 @@ def _setup(options, lib=None): config = _configure(options) - plugins = _load_plugins(config) + plugins = _load_plugins(options, config) + + # Add types and queries defined by plugins. + plugin_types_album = plugins.types(library.Album) + library.Album._types.update(plugin_types_album) + item_types = plugin_types_album.copy() + item_types.update(library.Item._types) + item_types.update(plugins.types(library.Item)) + library.Item._types = item_types + + library.Item._queries.update(plugins.named_queries(library.Item)) + library.Album._queries.update(plugins.named_queries(library.Album)) + + plugins.send("pluginload") # Get the default subcommands. from beets.ui.commands import default_commands @@ -1138,8 +1166,6 @@ def _setup(options, lib=None): if lib is None: lib = _open_library(config) plugins.send("library_opened", lib=lib) - library.Item._types.update(plugins.types(library.Item)) - library.Album._types.update(plugins.types(library.Album)) return subcommands, plugins, lib @@ -1165,18 +1191,18 @@ def _configure(options): log.set_global_level(logging.INFO) if overlay_path: - log.debug(u'overlaying configuration: {0}', + log.debug('overlaying configuration: {0}', util.displayable_path(overlay_path)) config_path = config.user_config_path() if os.path.isfile(config_path): - log.debug(u'user configuration: {0}', + log.debug('user configuration: {0}', util.displayable_path(config_path)) else: - log.debug(u'no user configuration found at {0}', + log.debug('no user configuration found at {0}', util.displayable_path(config_path)) - log.debug(u'data directory: {0}', + log.debug('data directory: {0}', util.displayable_path(config.config_dir())) return config @@ -1193,13 +1219,14 @@ def _open_library(config): get_replacements(), ) lib.get_item(0) # Test database connection. - except (sqlite3.OperationalError, sqlite3.DatabaseError): - log.debug(u'{}', traceback.format_exc()) - raise UserError(u"database file {0} could not be opened".format( - util.displayable_path(dbpath) + except (sqlite3.OperationalError, sqlite3.DatabaseError) as db_error: + log.debug('{}', traceback.format_exc()) + raise UserError("database file {} cannot not be opened: {}".format( + util.displayable_path(dbpath), + db_error )) - log.debug(u'library database: {0}\n' - u'library directory: {1}', + log.debug('library database: {0}\n' + 'library directory: {1}', util.displayable_path(lib.path), util.displayable_path(lib.directory)) return lib @@ -1213,15 +1240,17 @@ def _raw_main(args, lib=None): parser.add_format_option(flags=('--format-item',), target=library.Item) parser.add_format_option(flags=('--format-album',), target=library.Album) parser.add_option('-l', '--library', dest='library', - help=u'library database file to use') + help='library database file to use') parser.add_option('-d', '--directory', dest='directory', - help=u"destination music directory") + help="destination music directory") parser.add_option('-v', '--verbose', dest='verbose', action='count', - help=u'log more details (use twice for even more)') + help='log more details (use twice for even more)') parser.add_option('-c', '--config', dest='config', - help=u'path to configuration file') + help='path to configuration file') + parser.add_option('-p', '--plugins', dest='plugins', + help='a comma-separated list of plugins to load') parser.add_option('-h', '--help', dest='help', action='store_true', - help=u'show this help message and exit') + help='show this help message and exit') parser.add_option('--version', dest='version', action='store_true', help=optparse.SUPPRESS_HELP) @@ -1256,7 +1285,7 @@ def main(args=None): _raw_main(args) except UserError as exc: message = exc.args[0] if exc.args else None - log.error(u'error: {0}', message) + log.error('error: {0}', message) sys.exit(1) except util.HumanReadableException as exc: exc.log(log) @@ -1267,13 +1296,13 @@ def main(args=None): log.debug('{}', traceback.format_exc()) log.error('{}', exc) sys.exit(1) - except confit.ConfigError as exc: - log.error(u'configuration error: {0}', exc) + except confuse.ConfigError as exc: + log.error('configuration error: {0}', exc) sys.exit(1) except db_query.InvalidQueryError as exc: - log.error(u'invalid query: {0}', exc) + log.error('invalid query: {0}', exc) sys.exit(1) - except IOError as exc: + except OSError as exc: if exc.errno == errno.EPIPE: # "Broken pipe". End silently. sys.stderr.close() @@ -1281,11 +1310,11 @@ def main(args=None): raise except KeyboardInterrupt: # Silently ignore ^C except in verbose mode. - log.debug(u'{}', traceback.format_exc()) + log.debug('{}', traceback.format_exc()) except db.DBAccessError as exc: log.error( - u'database access error: {0}\n' - u'the library file might have a permissions problem', + 'database access error: {0}\n' + 'the library file might have a permissions problem', exc ) sys.exit(1) diff --git a/libs/common/beets/ui/commands.py b/libs/common/beets/ui/commands.py index 46ae1d93..3a337401 100644 --- a/libs/common/beets/ui/commands.py +++ b/libs/common/beets/ui/commands.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -17,7 +16,6 @@ interface. """ -from __future__ import division, absolute_import, print_function import os import re @@ -39,11 +37,10 @@ from beets.util import syspath, normpath, ancestry, displayable_path, \ from beets import library from beets import config from beets import logging -from beets.util.confit import _package_path -import six + from . import _store_dict -VARIOUS_ARTISTS = u'Various Artists' +VARIOUS_ARTISTS = 'Various Artists' PromptChoice = namedtuple('PromptChoice', ['short', 'long', 'callback']) # Global logger. @@ -75,9 +72,9 @@ def _do_query(lib, query, album, also_items=True): items = list(lib.items(query)) if album and not albums: - raise ui.UserError(u'No matching albums found.') + raise ui.UserError('No matching albums found.') elif not album and not items: - raise ui.UserError(u'No matching items found.') + raise ui.UserError('No matching items found.') return items, albums @@ -89,33 +86,34 @@ def _print_keys(query): returned row, with indentation of 2 spaces. """ for row in query: - print_(u' ' * 2 + row['key']) + print_(' ' * 2 + row['key']) def fields_func(lib, opts, args): def _print_rows(names): names.sort() - print_(u' ' + u'\n '.join(names)) + print_(' ' + '\n '.join(names)) - print_(u"Item fields:") + print_("Item fields:") _print_rows(library.Item.all_keys()) - print_(u"Album fields:") + print_("Album fields:") _print_rows(library.Album.all_keys()) with lib.transaction() as tx: # The SQL uses the DISTINCT to get unique values from the query unique_fields = 'SELECT DISTINCT key FROM (%s)' - print_(u"Item flexible attributes:") + print_("Item flexible attributes:") _print_keys(tx.query(unique_fields % library.Item._flex_table)) - print_(u"Album flexible attributes:") + print_("Album flexible attributes:") _print_keys(tx.query(unique_fields % library.Album._flex_table)) + fields_cmd = ui.Subcommand( 'fields', - help=u'show fields available for queries and format strings' + help='show fields available for queries and format strings' ) fields_cmd.func = fields_func default_commands.append(fields_cmd) @@ -126,9 +124,9 @@ default_commands.append(fields_cmd) class HelpCommand(ui.Subcommand): def __init__(self): - super(HelpCommand, self).__init__( + super().__init__( 'help', aliases=('?',), - help=u'give detailed help on a specific sub-command', + help='give detailed help on a specific sub-command', ) def func(self, lib, opts, args): @@ -136,7 +134,7 @@ class HelpCommand(ui.Subcommand): cmdname = args[0] helpcommand = self.root_parser._subcommand_for_name(cmdname) if not helpcommand: - raise ui.UserError(u"unknown command '{0}'".format(cmdname)) + raise ui.UserError(f"unknown command '{cmdname}'") helpcommand.print_help() else: self.root_parser.print_help() @@ -161,29 +159,31 @@ def disambig_string(info): if isinstance(info, hooks.AlbumInfo): if info.media: if info.mediums and info.mediums > 1: - disambig.append(u'{0}x{1}'.format( + disambig.append('{}x{}'.format( info.mediums, info.media )) else: disambig.append(info.media) if info.year: - disambig.append(six.text_type(info.year)) + disambig.append(str(info.year)) if info.country: disambig.append(info.country) if info.label: disambig.append(info.label) + if info.catalognum: + disambig.append(info.catalognum) if info.albumdisambig: disambig.append(info.albumdisambig) if disambig: - return u', '.join(disambig) + return ', '.join(disambig) def dist_string(dist): """Formats a distance (a float) as a colorized similarity percentage string. """ - out = u'%.1f%%' % ((1 - dist) * 100) + out = '%.1f%%' % ((1 - dist) * 100) if dist <= config['match']['strong_rec_thresh'].as_number(): out = ui.colorize('text_success', out) elif dist <= config['match']['medium_rec_thresh'].as_number(): @@ -206,7 +206,7 @@ def penalty_string(distance, limit=None): if penalties: if limit and len(penalties) > limit: penalties = penalties[:limit] + ['...'] - return ui.colorize('text_warning', u'(%s)' % ', '.join(penalties)) + return ui.colorize('text_warning', '(%s)' % ', '.join(penalties)) def show_change(cur_artist, cur_album, match): @@ -216,11 +216,11 @@ def show_change(cur_artist, cur_album, match): """ def show_album(artist, album): if artist: - album_description = u' %s - %s' % (artist, album) + album_description = f' {artist} - {album}' elif album: - album_description = u' %s' % album + album_description = ' %s' % album else: - album_description = u' (unknown album)' + album_description = ' (unknown album)' print_(album_description) def format_index(track_info): @@ -238,40 +238,44 @@ def show_change(cur_artist, cur_album, match): mediums = track_info.disctotal if config['per_disc_numbering']: if mediums and mediums > 1: - return u'{0}-{1}'.format(medium, medium_index) + return f'{medium}-{medium_index}' else: - return six.text_type(medium_index or index) + return str(medium_index if medium_index is not None + else index) else: - return six.text_type(index) + return str(index) # Identify the album in question. if cur_artist != match.info.artist or \ (cur_album != match.info.album and match.info.album != VARIOUS_ARTISTS): artist_l, artist_r = cur_artist or '', match.info.artist - album_l, album_r = cur_album or '', match.info.album + album_l, album_r = cur_album or '', match.info.album if artist_r == VARIOUS_ARTISTS: # Hide artists for VA releases. - artist_l, artist_r = u'', u'' + artist_l, artist_r = '', '' + + if config['artist_credit']: + artist_r = match.info.artist_credit artist_l, artist_r = ui.colordiff(artist_l, artist_r) album_l, album_r = ui.colordiff(album_l, album_r) - print_(u"Correcting tags from:") + print_("Correcting tags from:") show_album(artist_l, album_l) - print_(u"To:") + print_("To:") show_album(artist_r, album_r) else: - print_(u"Tagging:\n {0.artist} - {0.album}".format(match.info)) + print_("Tagging:\n {0.artist} - {0.album}".format(match.info)) # Data URL. if match.info.data_url: - print_(u'URL:\n %s' % match.info.data_url) + print_('URL:\n %s' % match.info.data_url) # Info line. info = [] # Similarity. - info.append(u'(Similarity: %s)' % dist_string(match.distance)) + info.append('(Similarity: %s)' % dist_string(match.distance)) # Penalties. penalties = penalty_string(match.distance) if penalties: @@ -279,7 +283,7 @@ def show_change(cur_artist, cur_album, match): # Disambiguation. disambig = disambig_string(match.info) if disambig: - info.append(ui.colorize('text_highlight_minor', u'(%s)' % disambig)) + info.append(ui.colorize('text_highlight_minor', '(%s)' % disambig)) print_(' '.join(info)) # Tracks. @@ -297,16 +301,16 @@ def show_change(cur_artist, cur_album, match): if medium != track_info.medium or disctitle != track_info.disctitle: media = match.info.media or 'Media' if match.info.mediums > 1 and track_info.disctitle: - lhs = u'%s %s: %s' % (media, track_info.medium, - track_info.disctitle) + lhs = '{} {}: {}'.format(media, track_info.medium, + track_info.disctitle) elif match.info.mediums > 1: - lhs = u'%s %s' % (media, track_info.medium) + lhs = f'{media} {track_info.medium}' elif track_info.disctitle: - lhs = u'%s: %s' % (media, track_info.disctitle) + lhs = f'{media}: {track_info.disctitle}' else: lhs = None if lhs: - lines.append((lhs, u'', 0)) + lines.append((lhs, '', 0)) medium, disctitle = track_info.medium, track_info.disctitle # Titles. @@ -327,7 +331,7 @@ def show_change(cur_artist, cur_album, match): color = 'text_highlight_minor' else: color = 'text_highlight' - templ = ui.colorize(color, u' (#{0})') + templ = ui.colorize(color, ' (#{0})') lhs += templ.format(cur_track) rhs += templ.format(new_track) lhs_width += len(cur_track) + 4 @@ -338,7 +342,7 @@ def show_change(cur_artist, cur_album, match): config['ui']['length_diff_thresh'].as_number(): cur_length = ui.human_seconds_short(item.length) new_length = ui.human_seconds_short(track_info.length) - templ = ui.colorize('text_highlight', u' ({0})') + templ = ui.colorize('text_highlight', ' ({0})') lhs += templ.format(cur_length) rhs += templ.format(new_length) lhs_width += len(cur_length) + 3 @@ -349,9 +353,9 @@ def show_change(cur_artist, cur_album, match): rhs += ' %s' % penalties if lhs != rhs: - lines.append((u' * %s' % lhs, rhs, lhs_width)) + lines.append((' * %s' % lhs, rhs, lhs_width)) elif config['import']['detail']: - lines.append((u' * %s' % lhs, '', lhs_width)) + lines.append((' * %s' % lhs, '', lhs_width)) # Print each track in two columns, or across two lines. col_width = (ui.term_width() - len(''.join([' * ', ' -> ']))) // 2 @@ -361,29 +365,36 @@ def show_change(cur_artist, cur_album, match): if not rhs: print_(lhs) elif max_width > col_width: - print_(u'%s ->\n %s' % (lhs, rhs)) + print_(f'{lhs} ->\n {rhs}') else: pad = max_width - lhs_width - print_(u'%s%s -> %s' % (lhs, ' ' * pad, rhs)) + print_('{}{} -> {}'.format(lhs, ' ' * pad, rhs)) # Missing and unmatched tracks. if match.extra_tracks: - print_(u'Missing tracks ({0}/{1} - {2:.1%}):'.format( + print_('Missing tracks ({}/{} - {:.1%}):'.format( len(match.extra_tracks), len(match.info.tracks), len(match.extra_tracks) / len(match.info.tracks) )) + pad_width = max(len(track_info.title) for track_info in + match.extra_tracks) for track_info in match.extra_tracks: - line = u' ! %s (#%s)' % (track_info.title, format_index(track_info)) + line = ' ! {0: <{width}} (#{1: >2})'.format(track_info.title, + format_index(track_info), + width=pad_width) if track_info.length: - line += u' (%s)' % ui.human_seconds_short(track_info.length) + line += ' (%s)' % ui.human_seconds_short(track_info.length) print_(ui.colorize('text_warning', line)) if match.extra_items: - print_(u'Unmatched tracks ({0}):'.format(len(match.extra_items))) + print_('Unmatched tracks ({}):'.format(len(match.extra_items))) + pad_width = max(len(item.title) for item in match.extra_items) for item in match.extra_items: - line = u' ! %s (#%s)' % (item.title, format_index(item)) + line = ' ! {0: <{width}} (#{1: >2})'.format(item.title, + format_index(item), + width=pad_width) if item.length: - line += u' (%s)' % ui.human_seconds_short(item.length) + line += ' (%s)' % ui.human_seconds_short(item.length) print_(ui.colorize('text_warning', line)) @@ -398,22 +409,22 @@ def show_item_change(item, match): cur_artist, new_artist = ui.colordiff(cur_artist, new_artist) cur_title, new_title = ui.colordiff(cur_title, new_title) - print_(u"Correcting track tags from:") - print_(u" %s - %s" % (cur_artist, cur_title)) - print_(u"To:") - print_(u" %s - %s" % (new_artist, new_title)) + print_("Correcting track tags from:") + print_(f" {cur_artist} - {cur_title}") + print_("To:") + print_(f" {new_artist} - {new_title}") else: - print_(u"Tagging track: %s - %s" % (cur_artist, cur_title)) + print_(f"Tagging track: {cur_artist} - {cur_title}") # Data URL. if match.info.data_url: - print_(u'URL:\n %s' % match.info.data_url) + print_('URL:\n %s' % match.info.data_url) # Info line. info = [] # Similarity. - info.append(u'(Similarity: %s)' % dist_string(match.distance)) + info.append('(Similarity: %s)' % dist_string(match.distance)) # Penalties. penalties = penalty_string(match.distance) if penalties: @@ -421,7 +432,7 @@ def show_item_change(item, match): # Disambiguation. disambig = disambig_string(match.info) if disambig: - info.append(ui.colorize('text_highlight_minor', u'(%s)' % disambig)) + info.append(ui.colorize('text_highlight_minor', '(%s)' % disambig)) print_(' '.join(info)) @@ -435,7 +446,7 @@ def summarize_items(items, singleton): """ summary_parts = [] if not singleton: - summary_parts.append(u"{0} items".format(len(items))) + summary_parts.append("{} items".format(len(items))) format_counts = {} for item in items: @@ -449,26 +460,31 @@ def summarize_items(items, singleton): format_counts.items(), key=lambda fmt_and_count: (-fmt_and_count[1], fmt_and_count[0]) ): - summary_parts.append('{0} {1}'.format(fmt, count)) + summary_parts.append(f'{fmt} {count}') if items: average_bitrate = sum([item.bitrate for item in items]) / len(items) total_duration = sum([item.length for item in items]) total_filesize = sum([item.filesize for item in items]) - summary_parts.append(u'{0}kbps'.format(int(average_bitrate / 1000))) + summary_parts.append('{}kbps'.format(int(average_bitrate / 1000))) + if items[0].format == "FLAC": + sample_bits = '{}kHz/{} bit'.format( + round(int(items[0].samplerate) / 1000, 1), items[0].bitdepth) + summary_parts.append(sample_bits) summary_parts.append(ui.human_seconds_short(total_duration)) summary_parts.append(ui.human_bytes(total_filesize)) - return u', '.join(summary_parts) + return ', '.join(summary_parts) def _summary_judgment(rec): """Determines whether a decision should be made without even asking the user. This occurs in quiet mode and when an action is chosen for - NONE recommendations. Return an action or None if the user should be - queried. May also print to the console if a summary judgment is - made. + NONE recommendations. Return None if the user should be queried. + Otherwise, returns an action. May also print to the console if a + summary judgment is made. """ + if config['import']['quiet']: if rec == Recommendation.strong: return importer.action.APPLY @@ -477,21 +493,21 @@ def _summary_judgment(rec): 'skip': importer.action.SKIP, 'asis': importer.action.ASIS, }) - + elif config['import']['timid']: + return None elif rec == Recommendation.none: action = config['import']['none_rec_action'].as_choice({ 'skip': importer.action.SKIP, 'asis': importer.action.ASIS, 'ask': None, }) - else: return None if action == importer.action.SKIP: - print_(u'Skipping.') + print_('Skipping.') elif action == importer.action.ASIS: - print_(u'Importing as-is.') + print_('Importing as-is.') return action @@ -526,12 +542,12 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, # Zero candidates. if not candidates: if singleton: - print_(u"No matching recordings found.") + print_("No matching recordings found.") else: - print_(u"No matching release found for {0} tracks." + print_("No matching release found for {} tracks." .format(itemcount)) - print_(u'For help, see: ' - u'http://beets.readthedocs.org/en/latest/faq.html#nomatch') + print_('For help, see: ' + 'https://beets.readthedocs.org/en/latest/faq.html#nomatch') sel = ui.input_options(choice_opts) if sel in choice_actions: return choice_actions[sel] @@ -550,22 +566,22 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, if not bypass_candidates: # Display list of candidates. - print_(u'Finding tags for {0} "{1} - {2}".'.format( - u'track' if singleton else u'album', + print_('Finding tags for {} "{} - {}".'.format( + 'track' if singleton else 'album', item.artist if singleton else cur_artist, item.title if singleton else cur_album, )) - print_(u'Candidates:') + print_('Candidates:') for i, match in enumerate(candidates): # Index, metadata, and distance. line = [ - u'{0}.'.format(i + 1), - u'{0} - {1}'.format( + '{}.'.format(i + 1), + '{} - {}'.format( match.info.artist, match.info.title if singleton else match.info.album, ), - u'({0})'.format(dist_string(match.distance)), + '({})'.format(dist_string(match.distance)), ] # Penalties. @@ -577,14 +593,14 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, disambig = disambig_string(match.info) if disambig: line.append(ui.colorize('text_highlight_minor', - u'(%s)' % disambig)) + '(%s)' % disambig)) - print_(u' '.join(line)) + print_(' '.join(line)) # Ask the user for a choice. sel = ui.input_options(choice_opts, numrange=(1, len(candidates))) - if sel == u'm': + if sel == 'm': pass elif sel in choice_actions: return choice_actions[sel] @@ -608,19 +624,19 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, # Ask for confirmation. default = config['import']['default_action'].as_choice({ - u'apply': u'a', - u'skip': u's', - u'asis': u'u', - u'none': None, + 'apply': 'a', + 'skip': 's', + 'asis': 'u', + 'none': None, }) if default is None: require = True # Bell ring when user interaction is needed. if config['import']['bell']: - ui.print_(u'\a', end=u'') - sel = ui.input_options((u'Apply', u'More candidates') + choice_opts, + ui.print_('\a', end='') + sel = ui.input_options(('Apply', 'More candidates') + choice_opts, require=require, default=default) - if sel == u'a': + if sel == 'a': return match elif sel in choice_actions: return choice_actions[sel] @@ -632,8 +648,8 @@ def manual_search(session, task): Input either an artist and album (for full albums) or artist and track name (for singletons) for manual search. """ - artist = input_(u'Artist:').strip() - name = input_(u'Album:' if task.is_album else u'Track:').strip() + artist = input_('Artist:').strip() + name = input_('Album:' if task.is_album else 'Track:').strip() if task.is_album: _, _, prop = autotag.tag_album( @@ -649,8 +665,8 @@ def manual_id(session, task): Input an ID, either for an album ("release") or a track ("recording"). """ - prompt = u'Enter {0} ID:'.format(u'release' if task.is_album - else u'recording') + prompt = 'Enter {} ID:'.format('release' if task.is_album + else 'recording') search_id = input_(prompt).strip() if task.is_album: @@ -671,6 +687,7 @@ def abort_action(session, task): class TerminalImportSession(importer.ImportSession): """An import session that runs in a terminal. """ + def choose_match(self, task): """Given an initial autotagging of items, go through an interactive dance with the user to ask for a choice of metadata. Returns an @@ -678,8 +695,21 @@ class TerminalImportSession(importer.ImportSession): """ # Show what we're tagging. print_() - print_(displayable_path(task.paths, u'\n') + - u' ({0} items)'.format(len(task.items))) + print_(displayable_path(task.paths, '\n') + + ' ({} items)'.format(len(task.items))) + + # Let plugins display info or prompt the user before we go through the + # process of selecting candidate. + results = plugins.send('import_task_before_choice', + session=self, task=task) + actions = [action for action in results if action] + + if len(actions) == 1: + return actions[0] + elif len(actions) > 1: + raise plugins.PluginConflictException( + 'Only one handler for `import_task_before_choice` may return ' + 'an action.') # Take immediate action if appropriate. action = _summary_judgment(task.rec) @@ -768,48 +798,48 @@ class TerminalImportSession(importer.ImportSession): """Decide what to do when a new album or item seems similar to one that's already in the library. """ - log.warning(u"This {0} is already in the library!", - (u"album" if task.is_album else u"item")) + log.warning("This {0} is already in the library!", + ("album" if task.is_album else "item")) if config['import']['quiet']: # In quiet mode, don't prompt -- just skip. - log.info(u'Skipping.') - sel = u's' + log.info('Skipping.') + sel = 's' else: # Print some detail about the existing and new items so the # user can make an informed decision. for duplicate in found_duplicates: - print_(u"Old: " + summarize_items( + print_("Old: " + summarize_items( list(duplicate.items()) if task.is_album else [duplicate], not task.is_album, )) - print_(u"New: " + summarize_items( + print_("New: " + summarize_items( task.imported_items(), not task.is_album, )) sel = ui.input_options( - (u'Skip new', u'Keep both', u'Remove old', u'Merge all') + ('Skip new', 'Keep all', 'Remove old', 'Merge all') ) - if sel == u's': + if sel == 's': # Skip new. task.set_choice(importer.action.SKIP) - elif sel == u'k': + elif sel == 'k': # Keep both. Do nothing; leave the choice intact. pass - elif sel == u'r': + elif sel == 'r': # Remove old. task.should_remove_duplicates = True - elif sel == u'm': + elif sel == 'm': task.should_merge_duplicates = True else: assert False def should_resume(self, path): - return ui.input_yn(u"Import of the directory:\n{0}\n" - u"was interrupted. Resume (Y/n)?" + return ui.input_yn("Import of the directory:\n{}\n" + "was interrupted. Resume (Y/n)?" .format(displayable_path(path))) def _get_choices(self, task): @@ -830,22 +860,22 @@ class TerminalImportSession(importer.ImportSession): """ # Standard, built-in choices. choices = [ - PromptChoice(u's', u'Skip', + PromptChoice('s', 'Skip', lambda s, t: importer.action.SKIP), - PromptChoice(u'u', u'Use as-is', + PromptChoice('u', 'Use as-is', lambda s, t: importer.action.ASIS) ] if task.is_album: choices += [ - PromptChoice(u't', u'as Tracks', + PromptChoice('t', 'as Tracks', lambda s, t: importer.action.TRACKS), - PromptChoice(u'g', u'Group albums', + PromptChoice('g', 'Group albums', lambda s, t: importer.action.ALBUMS), ] choices += [ - PromptChoice(u'e', u'Enter search', manual_search), - PromptChoice(u'i', u'enter Id', manual_id), - PromptChoice(u'b', u'aBort', abort_action), + PromptChoice('e', 'Enter search', manual_search), + PromptChoice('i', 'enter Id', manual_id), + PromptChoice('b', 'aBort', abort_action), ] # Send the before_choose_candidate event and flatten list. @@ -855,7 +885,7 @@ class TerminalImportSession(importer.ImportSession): # Add a "dummy" choice for the other baked-in option, for # duplicate checking. all_choices = [ - PromptChoice(u'a', u'Apply', None), + PromptChoice('a', 'Apply', None), ] + choices + extra_choices # Check for conflicts. @@ -868,8 +898,8 @@ class TerminalImportSession(importer.ImportSession): # Keep the first of the choices, removing the rest. dup_choices = [c for c in all_choices if c.short == short] for c in dup_choices[1:]: - log.warning(u"Prompt choice '{0}' removed due to conflict " - u"with '{1}' (short letter: '{2}')", + log.warning("Prompt choice '{0}' removed due to conflict " + "with '{1}' (short letter: '{2}')", c.long, dup_choices[0].long, c.short) extra_choices.remove(c) @@ -886,21 +916,21 @@ def import_files(lib, paths, query): # Check the user-specified directories. for path in paths: if not os.path.exists(syspath(normpath(path))): - raise ui.UserError(u'no such file or directory: {0}'.format( + raise ui.UserError('no such file or directory: {}'.format( displayable_path(path))) # Check parameter consistency. if config['import']['quiet'] and config['import']['timid']: - raise ui.UserError(u"can't be both quiet and timid") + raise ui.UserError("can't be both quiet and timid") # Open the log. if config['import']['log'].get() is not None: logpath = syspath(config['import']['log'].as_filename()) try: loghandler = logging.FileHandler(logpath) - except IOError: - raise ui.UserError(u"could not open log file for writing: " - u"{0}".format(displayable_path(logpath))) + except OSError: + raise ui.UserError("could not open log file for writing: " + "{}".format(displayable_path(logpath))) else: loghandler = None @@ -931,111 +961,111 @@ def import_func(lib, opts, args): query = None paths = args if not paths: - raise ui.UserError(u'no path specified') + raise ui.UserError('no path specified') - # On Python 2, we get filenames as raw bytes, which is what we - # need. On Python 3, we need to undo the "helpful" conversion to - # Unicode strings to get the real bytestring filename. - if not six.PY2: - paths = [p.encode(util.arg_encoding(), 'surrogateescape') - for p in paths] + # On Python 2, we used to get filenames as raw bytes, which is + # what we need. On Python 3, we need to undo the "helpful" + # conversion to Unicode strings to get the real bytestring + # filename. + paths = [p.encode(util.arg_encoding(), 'surrogateescape') + for p in paths] import_files(lib, paths, query) import_cmd = ui.Subcommand( - u'import', help=u'import new music', aliases=(u'imp', u'im') + 'import', help='import new music', aliases=('imp', 'im') ) import_cmd.parser.add_option( - u'-c', u'--copy', action='store_true', default=None, - help=u"copy tracks into library directory (default)" + '-c', '--copy', action='store_true', default=None, + help="copy tracks into library directory (default)" ) import_cmd.parser.add_option( - u'-C', u'--nocopy', action='store_false', dest='copy', - help=u"don't copy tracks (opposite of -c)" + '-C', '--nocopy', action='store_false', dest='copy', + help="don't copy tracks (opposite of -c)" ) import_cmd.parser.add_option( - u'-m', u'--move', action='store_true', dest='move', - help=u"move tracks into the library (overrides -c)" + '-m', '--move', action='store_true', dest='move', + help="move tracks into the library (overrides -c)" ) import_cmd.parser.add_option( - u'-w', u'--write', action='store_true', default=None, - help=u"write new metadata to files' tags (default)" + '-w', '--write', action='store_true', default=None, + help="write new metadata to files' tags (default)" ) import_cmd.parser.add_option( - u'-W', u'--nowrite', action='store_false', dest='write', - help=u"don't write metadata (opposite of -w)" + '-W', '--nowrite', action='store_false', dest='write', + help="don't write metadata (opposite of -w)" ) import_cmd.parser.add_option( - u'-a', u'--autotag', action='store_true', dest='autotag', - help=u"infer tags for imported files (default)" + '-a', '--autotag', action='store_true', dest='autotag', + help="infer tags for imported files (default)" ) import_cmd.parser.add_option( - u'-A', u'--noautotag', action='store_false', dest='autotag', - help=u"don't infer tags for imported files (opposite of -a)" + '-A', '--noautotag', action='store_false', dest='autotag', + help="don't infer tags for imported files (opposite of -a)" ) import_cmd.parser.add_option( - u'-p', u'--resume', action='store_true', default=None, - help=u"resume importing if interrupted" + '-p', '--resume', action='store_true', default=None, + help="resume importing if interrupted" ) import_cmd.parser.add_option( - u'-P', u'--noresume', action='store_false', dest='resume', - help=u"do not try to resume importing" + '-P', '--noresume', action='store_false', dest='resume', + help="do not try to resume importing" ) import_cmd.parser.add_option( - u'-q', u'--quiet', action='store_true', dest='quiet', - help=u"never prompt for input: skip albums instead" + '-q', '--quiet', action='store_true', dest='quiet', + help="never prompt for input: skip albums instead" ) import_cmd.parser.add_option( - u'-l', u'--log', dest='log', - help=u'file to log untaggable albums for later review' + '-l', '--log', dest='log', + help='file to log untaggable albums for later review' ) import_cmd.parser.add_option( - u'-s', u'--singletons', action='store_true', - help=u'import individual tracks instead of full albums' + '-s', '--singletons', action='store_true', + help='import individual tracks instead of full albums' ) import_cmd.parser.add_option( - u'-t', u'--timid', dest='timid', action='store_true', - help=u'always confirm all actions' + '-t', '--timid', dest='timid', action='store_true', + help='always confirm all actions' ) import_cmd.parser.add_option( - u'-L', u'--library', dest='library', action='store_true', - help=u'retag items matching a query' + '-L', '--library', dest='library', action='store_true', + help='retag items matching a query' ) import_cmd.parser.add_option( - u'-i', u'--incremental', dest='incremental', action='store_true', - help=u'skip already-imported directories' + '-i', '--incremental', dest='incremental', action='store_true', + help='skip already-imported directories' ) import_cmd.parser.add_option( - u'-I', u'--noincremental', dest='incremental', action='store_false', - help=u'do not skip already-imported directories' + '-I', '--noincremental', dest='incremental', action='store_false', + help='do not skip already-imported directories' ) import_cmd.parser.add_option( - u'--from-scratch', dest='from_scratch', action='store_true', - help=u'erase existing metadata before applying new metadata' + '--from-scratch', dest='from_scratch', action='store_true', + help='erase existing metadata before applying new metadata' ) import_cmd.parser.add_option( - u'--flat', dest='flat', action='store_true', - help=u'import an entire tree as a single album' + '--flat', dest='flat', action='store_true', + help='import an entire tree as a single album' ) import_cmd.parser.add_option( - u'-g', u'--group-albums', dest='group_albums', action='store_true', - help=u'group tracks in a folder into separate albums' + '-g', '--group-albums', dest='group_albums', action='store_true', + help='group tracks in a folder into separate albums' ) import_cmd.parser.add_option( - u'--pretend', dest='pretend', action='store_true', - help=u'just print the files to import' + '--pretend', dest='pretend', action='store_true', + help='just print the files to import' ) import_cmd.parser.add_option( - u'-S', u'--search-id', dest='search_ids', action='append', + '-S', '--search-id', dest='search_ids', action='append', metavar='ID', - help=u'restrict matching to a specific metadata backend ID' + help='restrict matching to a specific metadata backend ID' ) import_cmd.parser.add_option( - u'--set', dest='set_fields', action='callback', + '--set', dest='set_fields', action='callback', callback=_store_dict, metavar='FIELD=VALUE', - help=u'set the given fields to the supplied values' + help='set the given fields to the supplied values' ) import_cmd.func = import_func default_commands.append(import_cmd) @@ -1043,7 +1073,7 @@ default_commands.append(import_cmd) # list: Query and show library contents. -def list_items(lib, query, album, fmt=u''): +def list_items(lib, query, album, fmt=''): """Print out items in lib matching query. If album, then search for albums instead of single items. """ @@ -1059,9 +1089,9 @@ def list_func(lib, opts, args): list_items(lib, decargs(args), opts.album) -list_cmd = ui.Subcommand(u'list', help=u'query the library', aliases=(u'ls',)) -list_cmd.parser.usage += u"\n" \ - u'Example: %prog -f \'$album: $title\' artist:beatles' +list_cmd = ui.Subcommand('list', help='query the library', aliases=('ls',)) +list_cmd.parser.usage += "\n" \ + 'Example: %prog -f \'$album: $title\' artist:beatles' list_cmd.parser.add_all_common_options() list_cmd.func = list_func default_commands.append(list_cmd) @@ -1089,7 +1119,7 @@ def update_items(lib, query, album, move, pretend, fields): # Item deleted? if not os.path.exists(syspath(item.path)): ui.print_(format(item)) - ui.print_(ui.colorize('text_error', u' deleted')) + ui.print_(ui.colorize('text_error', ' deleted')) if not pretend: item.remove(True) affected_albums.add(item.album_id) @@ -1097,7 +1127,7 @@ def update_items(lib, query, album, move, pretend, fields): # Did the item change since last checked? if item.current_mtime() <= item.mtime: - log.debug(u'skipping {0} because mtime is up to date ({1})', + log.debug('skipping {0} because mtime is up to date ({1})', displayable_path(item.path), item.mtime) continue @@ -1105,7 +1135,7 @@ def update_items(lib, query, album, move, pretend, fields): try: item.read() except library.ReadError as exc: - log.error(u'error reading {0}: {1}', + log.error('error reading {0}: {1}', displayable_path(item.path), exc) continue @@ -1116,7 +1146,7 @@ def update_items(lib, query, album, move, pretend, fields): old_item = lib.get_item(item.id) if old_item.albumartist == old_item.artist == item.artist: item.albumartist = old_item.albumartist - item._dirty.discard(u'albumartist') + item._dirty.discard('albumartist') # Check for and display changes. changed = ui.show_model_changes( @@ -1149,7 +1179,7 @@ def update_items(lib, query, album, move, pretend, fields): continue album = lib.get_album(album_id) if not album: # Empty albums have already been removed. - log.debug(u'emptied album {0}', album_id) + log.debug('emptied album {0}', album_id) continue first_item = album.items().get() @@ -1160,42 +1190,48 @@ def update_items(lib, query, album, move, pretend, fields): # Move album art (and any inconsistent items). if move and lib.directory in ancestry(first_item.path): - log.debug(u'moving album {0}', album_id) + log.debug('moving album {0}', album_id) # Manually moving and storing the album. items = list(album.items()) for item in items: - item.move(store=False) + item.move(store=False, with_album=False) item.store(fields=fields) album.move(store=False) album.store(fields=fields) def update_func(lib, opts, args): + # Verify that the library folder exists to prevent accidental wipes. + if not os.path.isdir(lib.directory): + ui.print_("Library path is unavailable or does not exist.") + ui.print_(lib.directory) + if not ui.input_yn("Are you sure you want to continue (y/n)?", True): + return update_items(lib, decargs(args), opts.album, ui.should_move(opts.move), opts.pretend, opts.fields) update_cmd = ui.Subcommand( - u'update', help=u'update the library', aliases=(u'upd', u'up',) + 'update', help='update the library', aliases=('upd', 'up',) ) update_cmd.parser.add_album_option() update_cmd.parser.add_format_option() update_cmd.parser.add_option( - u'-m', u'--move', action='store_true', dest='move', - help=u"move files in the library directory" + '-m', '--move', action='store_true', dest='move', + help="move files in the library directory" ) update_cmd.parser.add_option( - u'-M', u'--nomove', action='store_false', dest='move', - help=u"don't move files in library" + '-M', '--nomove', action='store_false', dest='move', + help="don't move files in library" ) update_cmd.parser.add_option( - u'-p', u'--pretend', action='store_true', - help=u"show all changes but do nothing" + '-p', '--pretend', action='store_true', + help="show all changes but do nothing" ) update_cmd.parser.add_option( - u'-F', u'--field', default=None, action='append', dest='fields', - help=u'list of fields to update' + '-F', '--field', default=None, action='append', dest='fields', + help='list of fields to update' ) update_cmd.func = update_func default_commands.append(update_cmd) @@ -1209,31 +1245,53 @@ def remove_items(lib, query, album, delete, force): """ # Get the matching items. items, albums = _do_query(lib, query, album) + objs = albums if album else items # Confirm file removal if not forcing removal. if not force: # Prepare confirmation with user. - print_() + album_str = " in {} album{}".format( + len(albums), 's' if len(albums) > 1 else '' + ) if album else "" + if delete: - fmt = u'$path - $title' - prompt = u'Really DELETE %i file%s (y/n)?' % \ - (len(items), 's' if len(items) > 1 else '') + fmt = '$path - $title' + prompt = 'Really DELETE' + prompt_all = 'Really DELETE {} file{}{}'.format( + len(items), 's' if len(items) > 1 else '', album_str + ) else: - fmt = u'' - prompt = u'Really remove %i item%s from the library (y/n)?' % \ - (len(items), 's' if len(items) > 1 else '') + fmt = '' + prompt = 'Really remove from the library?' + prompt_all = 'Really remove {} item{}{} from the library?'.format( + len(items), 's' if len(items) > 1 else '', album_str + ) + + # Helpers for printing affected items + def fmt_track(t): + ui.print_(format(t, fmt)) + + def fmt_album(a): + ui.print_() + for i in a.items(): + fmt_track(i) + + fmt_obj = fmt_album if album else fmt_track # Show all the items. - for item in items: - ui.print_(format(item, fmt)) + for o in objs: + fmt_obj(o) # Confirm with user. - if not ui.input_yn(prompt, True): - return + objs = ui.input_select_objects(prompt, objs, fmt_obj, + prompt_all=prompt_all) + + if not objs: + return # Remove (and possibly delete) items. with lib.transaction(): - for obj in (albums if album else items): + for obj in objs: obj.remove(delete) @@ -1242,15 +1300,15 @@ def remove_func(lib, opts, args): remove_cmd = ui.Subcommand( - u'remove', help=u'remove matching items from the library', aliases=(u'rm',) + 'remove', help='remove matching items from the library', aliases=('rm',) ) remove_cmd.parser.add_option( - u"-d", u"--delete", action="store_true", - help=u"also remove files from disk" + "-d", "--delete", action="store_true", + help="also remove files from disk" ) remove_cmd.parser.add_option( - u"-f", u"--force", action="store_true", - help=u"do not ask when removing items" + "-f", "--force", action="store_true", + help="do not ask when removing items" ) remove_cmd.parser.add_album_option() remove_cmd.func = remove_func @@ -1275,7 +1333,7 @@ def show_stats(lib, query, exact): try: total_size += os.path.getsize(syspath(item.path)) except OSError as exc: - log.info(u'could not get size of {}: {}', item.path, exc) + log.info('could not get size of {}: {}', item.path, exc) else: total_size += int(item.length * item.bitrate / 8) total_time += item.length @@ -1285,20 +1343,20 @@ def show_stats(lib, query, exact): if item.album_id: albums.add(item.album_id) - size_str = u'' + ui.human_bytes(total_size) + size_str = '' + ui.human_bytes(total_size) if exact: - size_str += u' ({0} bytes)'.format(total_size) + size_str += f' ({total_size} bytes)' - print_(u"""Tracks: {0} -Total time: {1}{2} -{3}: {4} -Artists: {5} -Albums: {6} -Album artists: {7}""".format( + print_("""Tracks: {} +Total time: {}{} +{}: {} +Artists: {} +Albums: {} +Album artists: {}""".format( total_items, ui.human_seconds(total_time), - u' ({0:.2f} seconds)'.format(total_time) if exact else '', - u'Total size' if exact else u'Approximate total size', + f' ({total_time:.2f} seconds)' if exact else '', + 'Total size' if exact else 'Approximate total size', size_str, len(artists), len(albums), @@ -1311,11 +1369,11 @@ def stats_func(lib, opts, args): stats_cmd = ui.Subcommand( - u'stats', help=u'show statistics about the library or a query' + 'stats', help='show statistics about the library or a query' ) stats_cmd.parser.add_option( - u'-e', u'--exact', action='store_true', - help=u'exact size and time' + '-e', '--exact', action='store_true', + help='exact size and time' ) stats_cmd.func = stats_func default_commands.append(stats_cmd) @@ -1324,18 +1382,18 @@ default_commands.append(stats_cmd) # version: Show current beets version. def show_version(lib, opts, args): - print_(u'beets version %s' % beets.__version__) - print_(u'Python version {}'.format(python_version())) + print_('beets version %s' % beets.__version__) + print_(f'Python version {python_version()}') # Show plugins. names = sorted(p.name for p in plugins.find_plugins()) if names: - print_(u'plugins:', ', '.join(names)) + print_('plugins:', ', '.join(names)) else: - print_(u'no plugins loaded') + print_('no plugins loaded') version_cmd = ui.Subcommand( - u'version', help=u'output version information' + 'version', help='output version information' ) version_cmd.func = show_version default_commands.append(version_cmd) @@ -1362,31 +1420,31 @@ def modify_items(lib, mods, dels, query, write, move, album, confirm): # Apply changes *temporarily*, preview them, and collect modified # objects. - print_(u'Modifying {0} {1}s.' - .format(len(objs), u'album' if album else u'item')) - changed = set() + print_('Modifying {} {}s.' + .format(len(objs), 'album' if album else 'item')) + changed = [] for obj in objs: - if print_and_modify(obj, mods, dels): - changed.add(obj) + if print_and_modify(obj, mods, dels) and obj not in changed: + changed.append(obj) # Still something to do? if not changed: - print_(u'No changes to make.') + print_('No changes to make.') return # Confirm action. if confirm: if write and move: - extra = u', move and write tags' + extra = ', move and write tags' elif write: - extra = u' and write tags' + extra = ' and write tags' elif move: - extra = u' and move' + extra = ' and move' else: - extra = u'' + extra = '' changed = ui.input_select_objects( - u'Really modify%s' % extra, changed, + 'Really modify%s' % extra, changed, lambda o: print_and_modify(o, mods, dels) ) @@ -1434,35 +1492,35 @@ def modify_parse_args(args): def modify_func(lib, opts, args): query, mods, dels = modify_parse_args(decargs(args)) if not mods and not dels: - raise ui.UserError(u'no modifications specified') + raise ui.UserError('no modifications specified') modify_items(lib, mods, dels, query, ui.should_write(opts.write), ui.should_move(opts.move), opts.album, not opts.yes) modify_cmd = ui.Subcommand( - u'modify', help=u'change metadata fields', aliases=(u'mod',) + 'modify', help='change metadata fields', aliases=('mod',) ) modify_cmd.parser.add_option( - u'-m', u'--move', action='store_true', dest='move', - help=u"move files in the library directory" + '-m', '--move', action='store_true', dest='move', + help="move files in the library directory" ) modify_cmd.parser.add_option( - u'-M', u'--nomove', action='store_false', dest='move', - help=u"don't move files in library" + '-M', '--nomove', action='store_false', dest='move', + help="don't move files in library" ) modify_cmd.parser.add_option( - u'-w', u'--write', action='store_true', default=None, - help=u"write new metadata to files' tags (default)" + '-w', '--write', action='store_true', default=None, + help="write new metadata to files' tags (default)" ) modify_cmd.parser.add_option( - u'-W', u'--nowrite', action='store_false', dest='write', - help=u"don't write metadata (opposite of -w)" + '-W', '--nowrite', action='store_false', dest='write', + help="don't write metadata (opposite of -w)" ) modify_cmd.parser.add_album_option() modify_cmd.parser.add_format_option(target='item') modify_cmd.parser.add_option( - u'-y', u'--yes', action='store_true', - help=u'skip confirmation' + '-y', '--yes', action='store_true', + help='skip confirmation' ) modify_cmd.func = modify_func default_commands.append(modify_cmd) @@ -1478,18 +1536,28 @@ def move_items(lib, dest, query, copy, album, pretend, confirm=False, """ items, albums = _do_query(lib, query, album, False) objs = albums if album else items + num_objs = len(objs) # Filter out files that don't need to be moved. - isitemmoved = lambda item: item.path != item.destination(basedir=dest) - isalbummoved = lambda album: any(isitemmoved(i) for i in album.items()) + def isitemmoved(item): + return item.path != item.destination(basedir=dest) + + def isalbummoved(album): + return any(isitemmoved(i) for i in album.items()) + objs = [o for o in objs if (isalbummoved if album else isitemmoved)(o)] + num_unmoved = num_objs - len(objs) + # Report unmoved files that match the query. + unmoved_msg = '' + if num_unmoved > 0: + unmoved_msg = f' ({num_unmoved} already in place)' copy = copy or export # Exporting always copies. - action = u'Copying' if copy else u'Moving' - act = u'copy' if copy else u'move' - entity = u'album' if album else u'item' - log.info(u'{0} {1} {2}{3}.', action, len(objs), entity, - u's' if len(objs) != 1 else u'') + action = 'Copying' if copy else 'Moving' + act = 'copy' if copy else 'move' + entity = 'album' if album else 'item' + log.info('{0} {1} {2}{3}{4}.', action, len(objs), entity, + 's' if len(objs) != 1 else '', unmoved_msg) if not objs: return @@ -1503,12 +1571,12 @@ def move_items(lib, dest, query, copy, album, pretend, confirm=False, else: if confirm: objs = ui.input_select_objects( - u'Really %s' % act, objs, + 'Really %s' % act, objs, lambda o: show_path_changes( [(o.path, o.destination(basedir=dest))])) for obj in objs: - log.debug(u'moving: {0}', util.displayable_path(obj.path)) + log.debug('moving: {0}', util.displayable_path(obj.path)) if export: # Copy without affecting the database. @@ -1527,34 +1595,34 @@ def move_func(lib, opts, args): if dest is not None: dest = normpath(dest) if not os.path.isdir(dest): - raise ui.UserError(u'no such directory: %s' % dest) + raise ui.UserError('no such directory: %s' % dest) move_items(lib, dest, decargs(args), opts.copy, opts.album, opts.pretend, opts.timid, opts.export) move_cmd = ui.Subcommand( - u'move', help=u'move or copy items', aliases=(u'mv',) + 'move', help='move or copy items', aliases=('mv',) ) move_cmd.parser.add_option( - u'-d', u'--dest', metavar='DIR', dest='dest', - help=u'destination directory' + '-d', '--dest', metavar='DIR', dest='dest', + help='destination directory' ) move_cmd.parser.add_option( - u'-c', u'--copy', default=False, action='store_true', - help=u'copy instead of moving' + '-c', '--copy', default=False, action='store_true', + help='copy instead of moving' ) move_cmd.parser.add_option( - u'-p', u'--pretend', default=False, action='store_true', - help=u'show how files would be moved, but don\'t touch anything' + '-p', '--pretend', default=False, action='store_true', + help='show how files would be moved, but don\'t touch anything' ) move_cmd.parser.add_option( - u'-t', u'--timid', dest='timid', action='store_true', - help=u'always confirm all actions' + '-t', '--timid', dest='timid', action='store_true', + help='always confirm all actions' ) move_cmd.parser.add_option( - u'-e', u'--export', default=False, action='store_true', - help=u'copy without changing the database path' + '-e', '--export', default=False, action='store_true', + help='copy without changing the database path' ) move_cmd.parser.add_album_option() move_cmd.func = move_func @@ -1572,14 +1640,14 @@ def write_items(lib, query, pretend, force): for item in items: # Item deleted? if not os.path.exists(syspath(item.path)): - log.info(u'missing file: {0}', util.displayable_path(item.path)) + log.info('missing file: {0}', util.displayable_path(item.path)) continue # Get an Item object reflecting the "clean" (on-disk) state. try: clean_item = library.Item.from_path(item.path) except library.ReadError as exc: - log.error(u'error reading {0}: {1}', + log.error('error reading {0}: {1}', displayable_path(item.path), exc) continue @@ -1596,14 +1664,14 @@ def write_func(lib, opts, args): write_items(lib, decargs(args), opts.pretend, opts.force) -write_cmd = ui.Subcommand(u'write', help=u'write tag information to files') +write_cmd = ui.Subcommand('write', help='write tag information to files') write_cmd.parser.add_option( - u'-p', u'--pretend', action='store_true', - help=u"show all changes but do nothing" + '-p', '--pretend', action='store_true', + help="show all changes but do nothing" ) write_cmd.parser.add_option( - u'-f', u'--force', action='store_true', - help=u"write tags even if the existing tags match the database" + '-f', '--force', action='store_true', + help="write tags even if the existing tags match the database" ) write_cmd.func = write_func default_commands.append(write_cmd) @@ -1640,7 +1708,10 @@ def config_func(lib, opts, args): # Dump configuration. else: config_out = config.dump(full=opts.defaults, redact=opts.redact) - print_(util.text_string(config_out)) + if config_out.strip() != '{}': + print_(util.text_string(config_out)) + else: + print("Empty configuration") def config_edit(): @@ -1654,29 +1725,30 @@ def config_edit(): open(path, 'w+').close() util.interactive_open([path], editor) except OSError as exc: - message = u"Could not edit configuration: {0}".format(exc) + message = f"Could not edit configuration: {exc}" if not editor: - message += u". Please set the EDITOR environment variable" + message += ". Please set the EDITOR environment variable" raise ui.UserError(message) -config_cmd = ui.Subcommand(u'config', - help=u'show or edit the user configuration') + +config_cmd = ui.Subcommand('config', + help='show or edit the user configuration') config_cmd.parser.add_option( - u'-p', u'--paths', action='store_true', - help=u'show files that configuration was loaded from' + '-p', '--paths', action='store_true', + help='show files that configuration was loaded from' ) config_cmd.parser.add_option( - u'-e', u'--edit', action='store_true', - help=u'edit user configuration with $EDITOR' + '-e', '--edit', action='store_true', + help='edit user configuration with $EDITOR' ) config_cmd.parser.add_option( - u'-d', u'--defaults', action='store_true', - help=u'include the default configuration' + '-d', '--defaults', action='store_true', + help='include the default configuration' ) config_cmd.parser.add_option( - u'-c', u'--clear', action='store_false', + '-c', '--clear', action='store_false', dest='redact', default=True, - help=u'do not redact sensitive fields' + help='do not redact sensitive fields' ) config_cmd.func = config_func default_commands.append(config_cmd) @@ -1686,19 +1758,20 @@ default_commands.append(config_cmd) def print_completion(*args): for line in completion_script(default_commands + plugins.commands()): - print_(line, end=u'') + print_(line, end='') if not any(map(os.path.isfile, BASH_COMPLETION_PATHS)): - log.warning(u'Warning: Unable to find the bash-completion package. ' - u'Command line completion might not work.') + log.warning('Warning: Unable to find the bash-completion package. ' + 'Command line completion might not work.') + BASH_COMPLETION_PATHS = map(syspath, [ - u'/etc/bash_completion', - u'/usr/share/bash-completion/bash_completion', - u'/usr/local/share/bash-completion/bash_completion', + '/etc/bash_completion', + '/usr/share/bash-completion/bash_completion', + '/usr/local/share/bash-completion/bash_completion', # SmartOS - u'/opt/local/share/bash-completion/bash_completion', + '/opt/local/share/bash-completion/bash_completion', # Homebrew (before bash-completion2) - u'/usr/local/etc/bash_completion', + '/usr/local/etc/bash_completion', ]) @@ -1708,8 +1781,8 @@ def completion_script(commands): ``commands`` is alist of ``ui.Subcommand`` instances to generate completion data for. """ - base_script = os.path.join(_package_path('beets.ui'), 'completion_base.sh') - with open(base_script, 'r') as base_script: + base_script = os.path.join(os.path.dirname(__file__), 'completion_base.sh') + with open(base_script) as base_script: yield util.text_string(base_script.read()) options = {} @@ -1725,12 +1798,12 @@ def completion_script(commands): if re.match(r'^\w+$', alias): aliases[alias] = name - options[name] = {u'flags': [], u'opts': []} + options[name] = {'flags': [], 'opts': []} for opts in cmd.parser._get_all_options()[1:]: if opts.action in ('store_true', 'store_false'): - option_type = u'flags' + option_type = 'flags' else: - option_type = u'opts' + option_type = 'opts' options[name][option_type].extend( opts._short_opts + opts._long_opts @@ -1738,31 +1811,31 @@ def completion_script(commands): # Add global options options['_global'] = { - u'flags': [u'-v', u'--verbose'], - u'opts': - u'-l --library -c --config -d --directory -h --help'.split(u' ') + 'flags': ['-v', '--verbose'], + 'opts': + '-l --library -c --config -d --directory -h --help'.split(' ') } # Add flags common to all commands options['_common'] = { - u'flags': [u'-h', u'--help'] + 'flags': ['-h', '--help'] } # Start generating the script - yield u"_beet() {\n" + yield "_beet() {\n" # Command names - yield u" local commands='%s'\n" % ' '.join(command_names) - yield u"\n" + yield " local commands='%s'\n" % ' '.join(command_names) + yield "\n" # Command aliases - yield u" local aliases='%s'\n" % ' '.join(aliases.keys()) + yield " local aliases='%s'\n" % ' '.join(aliases.keys()) for alias, cmd in aliases.items(): - yield u" local alias__%s=%s\n" % (alias.replace('-', '_'), cmd) - yield u'\n' + yield " local alias__{}={}\n".format(alias.replace('-', '_'), cmd) + yield '\n' # Fields - yield u" fields='%s'\n" % ' '.join( + yield " fields='%s'\n" % ' '.join( set( list(library.Item._fields.keys()) + list(library.Album._fields.keys()) @@ -1773,17 +1846,17 @@ def completion_script(commands): for cmd, opts in options.items(): for option_type, option_list in opts.items(): if option_list: - option_list = u' '.join(option_list) - yield u" local %s__%s='%s'\n" % ( + option_list = ' '.join(option_list) + yield " local {}__{}='{}'\n".format( option_type, cmd.replace('-', '_'), option_list) - yield u' _beet_dispatch\n' - yield u'}\n' + yield ' _beet_dispatch\n' + yield '}\n' completion_cmd = ui.Subcommand( 'completion', - help=u'print shell script that provides command line completion' + help='print shell script that provides command line completion' ) completion_cmd.func = print_completion completion_cmd.hide = True diff --git a/libs/common/beets/util/__init__.py b/libs/common/beets/util/__init__.py index 69870edf..d58bb28e 100644 --- a/libs/common/beets/util/__init__.py +++ b/libs/common/beets/util/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,28 +14,28 @@ """Miscellaneous utility functions.""" -from __future__ import division, absolute_import, print_function import os import sys import errno import locale import re +import tempfile import shutil import fnmatch -from collections import Counter +import functools +from collections import Counter, namedtuple +from multiprocessing.pool import ThreadPool import traceback import subprocess import platform import shlex from beets.util import hidden -import six from unidecode import unidecode from enum import Enum MAX_FILENAME_LENGTH = 200 -WINDOWS_MAGIC_PREFIX = u'\\\\?\\' -SNI_SUPPORTED = sys.version_info >= (2, 7, 9) +WINDOWS_MAGIC_PREFIX = '\\\\?\\' class HumanReadableException(Exception): @@ -58,27 +57,27 @@ class HumanReadableException(Exception): self.reason = reason self.verb = verb self.tb = tb - super(HumanReadableException, self).__init__(self.get_message()) + super().__init__(self.get_message()) def _gerund(self): """Generate a (likely) gerund form of the English verb. """ - if u' ' in self.verb: + if ' ' in self.verb: return self.verb - gerund = self.verb[:-1] if self.verb.endswith(u'e') else self.verb - gerund += u'ing' + gerund = self.verb[:-1] if self.verb.endswith('e') else self.verb + gerund += 'ing' return gerund def _reasonstr(self): """Get the reason as a string.""" - if isinstance(self.reason, six.text_type): + if isinstance(self.reason, str): return self.reason elif isinstance(self.reason, bytes): return self.reason.decode('utf-8', 'ignore') elif hasattr(self.reason, 'strerror'): # i.e., EnvironmentError return self.reason.strerror else: - return u'"{0}"'.format(six.text_type(self.reason)) + return '"{}"'.format(str(self.reason)) def get_message(self): """Create the human-readable description of the error, sans @@ -92,7 +91,7 @@ class HumanReadableException(Exception): """ if self.tb: logger.debug(self.tb) - logger.error(u'{0}: {1}', self.error_kind, self.args[0]) + logger.error('{0}: {1}', self.error_kind, self.args[0]) class FilesystemError(HumanReadableException): @@ -100,29 +99,30 @@ class FilesystemError(HumanReadableException): via a function in this module. The `paths` field is a sequence of pathnames involved in the operation. """ + def __init__(self, reason, verb, paths, tb=None): self.paths = paths - super(FilesystemError, self).__init__(reason, verb, tb) + super().__init__(reason, verb, tb) def get_message(self): # Use a nicer English phrasing for some specific verbs. if self.verb in ('move', 'copy', 'rename'): - clause = u'while {0} {1} to {2}'.format( + clause = 'while {} {} to {}'.format( self._gerund(), displayable_path(self.paths[0]), displayable_path(self.paths[1]) ) elif self.verb in ('delete', 'write', 'create', 'read'): - clause = u'while {0} {1}'.format( + clause = 'while {} {}'.format( self._gerund(), displayable_path(self.paths[0]) ) else: - clause = u'during {0} of paths {1}'.format( - self.verb, u', '.join(displayable_path(p) for p in self.paths) + clause = 'during {} of paths {}'.format( + self.verb, ', '.join(displayable_path(p) for p in self.paths) ) - return u'{0} {1}'.format(self._reasonstr(), clause) + return f'{self._reasonstr()} {clause}' class MoveOperation(Enum): @@ -132,6 +132,8 @@ class MoveOperation(Enum): COPY = 1 LINK = 2 HARDLINK = 3 + REFLINK = 4 + REFLINK_AUTO = 5 def normpath(path): @@ -182,7 +184,7 @@ def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None): contents = os.listdir(syspath(path)) except OSError as exc: if logger: - logger.warning(u'could not list directory {0}: {1}'.format( + logger.warning('could not list directory {}: {}'.format( displayable_path(path), exc.strerror )) return @@ -195,6 +197,10 @@ def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None): skip = False for pat in ignore: if fnmatch.fnmatch(base, pat): + if logger: + logger.debug('ignoring {} due to ignore rule {}'.format( + base, pat + )) skip = True break if skip: @@ -217,8 +223,14 @@ def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None): for base in dirs: cur = os.path.join(path, base) # yield from sorted_walk(...) - for res in sorted_walk(cur, ignore, ignore_hidden, logger): - yield res + yield from sorted_walk(cur, ignore, ignore_hidden, logger) + + +def path_as_posix(path): + """Return the string representation of the path with forward (/) + slashes. + """ + return path.replace(b'\\', b'/') def mkdirall(path): @@ -229,7 +241,7 @@ def mkdirall(path): if not os.path.isdir(syspath(ancestor)): try: os.mkdir(syspath(ancestor)) - except (OSError, IOError) as exc: + except OSError as exc: raise FilesystemError(exc, 'create', (ancestor,), traceback.format_exc()) @@ -282,13 +294,13 @@ def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')): continue clutter = [bytestring_path(c) for c in clutter] match_paths = [bytestring_path(d) for d in os.listdir(directory)] - if fnmatch_all(match_paths, clutter): - # Directory contains only clutter (or nothing). - try: + try: + if fnmatch_all(match_paths, clutter): + # Directory contains only clutter (or nothing). shutil.rmtree(directory) - except OSError: + else: break - else: + except OSError: break @@ -367,18 +379,18 @@ def bytestring_path(path): PATH_SEP = bytestring_path(os.sep) -def displayable_path(path, separator=u'; '): +def displayable_path(path, separator='; '): """Attempts to decode a bytestring path to a unicode object for the purpose of displaying it to the user. If the `path` argument is a list or a tuple, the elements are joined with `separator`. """ if isinstance(path, (list, tuple)): return separator.join(displayable_path(p) for p in path) - elif isinstance(path, six.text_type): + elif isinstance(path, str): return path elif not isinstance(path, bytes): # A non-string object: just get its unicode representation. - return six.text_type(path) + return str(path) try: return path.decode(_fsencoding(), 'ignore') @@ -397,7 +409,7 @@ def syspath(path, prefix=True): if os.path.__name__ != 'ntpath': return path - if not isinstance(path, six.text_type): + if not isinstance(path, str): # Beets currently represents Windows paths internally with UTF-8 # arbitrarily. But earlier versions used MBCS because it is # reported as the FS encoding by Windows. Try both. @@ -410,11 +422,11 @@ def syspath(path, prefix=True): path = path.decode(encoding, 'replace') # Add the magic prefix if it isn't already there. - # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx + # https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx if prefix and not path.startswith(WINDOWS_MAGIC_PREFIX): - if path.startswith(u'\\\\'): + if path.startswith('\\\\'): # UNC path. Final path should look like \\?\UNC\... - path = u'UNC' + path[1:] + path = 'UNC' + path[1:] path = WINDOWS_MAGIC_PREFIX + path return path @@ -436,7 +448,7 @@ def remove(path, soft=True): return try: os.remove(path) - except (OSError, IOError) as exc: + except OSError as exc: raise FilesystemError(exc, 'delete', (path,), traceback.format_exc()) @@ -451,10 +463,10 @@ def copy(path, dest, replace=False): path = syspath(path) dest = syspath(dest) if not replace and os.path.exists(dest): - raise FilesystemError(u'file exists', 'copy', (path, dest)) + raise FilesystemError('file exists', 'copy', (path, dest)) try: shutil.copyfile(path, dest) - except (OSError, IOError) as exc: + except OSError as exc: raise FilesystemError(exc, 'copy', (path, dest), traceback.format_exc()) @@ -467,24 +479,37 @@ def move(path, dest, replace=False): instead, in which case metadata will *not* be preserved. Paths are translated to system paths. """ + if os.path.isdir(path): + raise FilesystemError(u'source is directory', 'move', (path, dest)) + if os.path.isdir(dest): + raise FilesystemError(u'destination is directory', 'move', + (path, dest)) if samefile(path, dest): return path = syspath(path) dest = syspath(dest) if os.path.exists(dest) and not replace: - raise FilesystemError(u'file exists', 'rename', (path, dest)) + raise FilesystemError('file exists', 'rename', (path, dest)) # First, try renaming the file. try: - os.rename(path, dest) + os.replace(path, dest) except OSError: - # Otherwise, copy and delete the original. + tmp = tempfile.mktemp(suffix='.beets', + prefix=py3_path(b'.' + os.path.basename(dest)), + dir=py3_path(os.path.dirname(dest))) + tmp = syspath(tmp) try: - shutil.copyfile(path, dest) + shutil.copyfile(path, tmp) + os.replace(tmp, dest) + tmp = None os.remove(path) - except (OSError, IOError) as exc: + except OSError as exc: raise FilesystemError(exc, 'move', (path, dest), traceback.format_exc()) + finally: + if tmp is not None: + os.remove(tmp) def link(path, dest, replace=False): @@ -496,18 +521,18 @@ def link(path, dest, replace=False): return if os.path.exists(syspath(dest)) and not replace: - raise FilesystemError(u'file exists', 'rename', (path, dest)) + raise FilesystemError('file exists', 'rename', (path, dest)) try: os.symlink(syspath(path), syspath(dest)) except NotImplementedError: # raised on python >= 3.2 and Windows versions before Vista - raise FilesystemError(u'OS does not support symbolic links.' + raise FilesystemError('OS does not support symbolic links.' 'link', (path, dest), traceback.format_exc()) except OSError as exc: # TODO: Windows version checks can be removed for python 3 if hasattr('sys', 'getwindowsversion'): if sys.getwindowsversion()[0] < 6: # is before Vista - exc = u'OS does not support symbolic links.' + exc = 'OS does not support symbolic links.' raise FilesystemError(exc, 'link', (path, dest), traceback.format_exc()) @@ -521,21 +546,50 @@ def hardlink(path, dest, replace=False): return if os.path.exists(syspath(dest)) and not replace: - raise FilesystemError(u'file exists', 'rename', (path, dest)) + raise FilesystemError('file exists', 'rename', (path, dest)) try: os.link(syspath(path), syspath(dest)) except NotImplementedError: - raise FilesystemError(u'OS does not support hard links.' + raise FilesystemError('OS does not support hard links.' 'link', (path, dest), traceback.format_exc()) except OSError as exc: if exc.errno == errno.EXDEV: - raise FilesystemError(u'Cannot hard link across devices.' + raise FilesystemError('Cannot hard link across devices.' 'link', (path, dest), traceback.format_exc()) else: raise FilesystemError(exc, 'link', (path, dest), traceback.format_exc()) +def reflink(path, dest, replace=False, fallback=False): + """Create a reflink from `dest` to `path`. + + Raise an `OSError` if `dest` already exists, unless `replace` is + True. If `path` == `dest`, then do nothing. + + If reflinking fails and `fallback` is enabled, try copying the file + instead. Otherwise, raise an error without trying a plain copy. + + May raise an `ImportError` if the `reflink` module is not available. + """ + import reflink as pyreflink + + if samefile(path, dest): + return + + if os.path.exists(syspath(dest)) and not replace: + raise FilesystemError('file exists', 'rename', (path, dest)) + + try: + pyreflink.reflink(path, dest) + except (NotImplementedError, pyreflink.ReflinkImpossibleError): + if fallback: + copy(path, dest, replace) + else: + raise FilesystemError('OS/filesystem does not support reflinks.', + 'link', (path, dest), traceback.format_exc()) + + def unique_path(path): """Returns a version of ``path`` that does not exist on the filesystem. Specifically, if ``path` itself already exists, then @@ -553,22 +607,23 @@ def unique_path(path): num = 0 while True: num += 1 - suffix = u'.{}'.format(num).encode() + ext + suffix = f'.{num}'.encode() + ext new_path = base + suffix if not os.path.exists(new_path): return new_path + # Note: The Windows "reserved characters" are, of course, allowed on # Unix. They are forbidden here because they cause problems on Samba # shares, which are sufficiently common as to cause frequent problems. -# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx +# https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx CHAR_REPLACE = [ - (re.compile(r'[\\/]'), u'_'), # / and \ -- forbidden everywhere. - (re.compile(r'^\.'), u'_'), # Leading dot (hidden files on Unix). - (re.compile(r'[\x00-\x1f]'), u''), # Control characters. - (re.compile(r'[<>:"\?\*\|]'), u'_'), # Windows "reserved characters". - (re.compile(r'\.$'), u'_'), # Trailing dots. - (re.compile(r'\s+$'), u''), # Trailing whitespace. + (re.compile(r'[\\/]'), '_'), # / and \ -- forbidden everywhere. + (re.compile(r'^\.'), '_'), # Leading dot (hidden files on Unix). + (re.compile(r'[\x00-\x1f]'), ''), # Control characters. + (re.compile(r'[<>:"\?\*\|]'), '_'), # Windows "reserved characters". + (re.compile(r'\.$'), '_'), # Trailing dots. + (re.compile(r'\s+$'), ''), # Trailing whitespace. ] @@ -692,36 +747,29 @@ def py3_path(path): it is. So this function helps us "smuggle" the true bytes data through APIs that took Python 3's Unicode mandate too seriously. """ - if isinstance(path, six.text_type): + if isinstance(path, str): return path assert isinstance(path, bytes) - if six.PY2: - return path return os.fsdecode(path) def str2bool(value): """Returns a boolean reflecting a human-entered string.""" - return value.lower() in (u'yes', u'1', u'true', u't', u'y') + return value.lower() in ('yes', '1', 'true', 't', 'y') def as_string(value): """Convert a value to a Unicode object for matching with a query. None becomes the empty string. Bytestrings are silently decoded. """ - if six.PY2: - buffer_types = buffer, memoryview # noqa: F821 - else: - buffer_types = memoryview - if value is None: - return u'' - elif isinstance(value, buffer_types): + return '' + elif isinstance(value, memoryview): return bytes(value).decode('utf-8', 'ignore') elif isinstance(value, bytes): return value.decode('utf-8', 'ignore') else: - return six.text_type(value) + return str(value) def text_string(value, encoding='utf-8'): @@ -744,7 +792,7 @@ def plurality(objs): """ c = Counter(objs) if not c: - raise ValueError(u'sequence must be non-empty') + raise ValueError('sequence must be non-empty') return c.most_common(1)[0] @@ -761,7 +809,11 @@ def cpu_count(): num = 0 elif sys.platform == 'darwin': try: - num = int(command_output(['/usr/sbin/sysctl', '-n', 'hw.ncpu'])) + num = int(command_output([ + '/usr/sbin/sysctl', + '-n', + 'hw.ncpu', + ]).stdout) except (ValueError, OSError, subprocess.CalledProcessError): num = 0 else: @@ -781,20 +833,23 @@ def convert_command_args(args): assert isinstance(args, list) def convert(arg): - if six.PY2: - if isinstance(arg, six.text_type): - arg = arg.encode(arg_encoding()) - else: - if isinstance(arg, bytes): - arg = arg.decode(arg_encoding(), 'surrogateescape') + if isinstance(arg, bytes): + arg = arg.decode(arg_encoding(), 'surrogateescape') return arg return [convert(a) for a in args] +# stdout and stderr as bytes +CommandOutput = namedtuple("CommandOutput", ("stdout", "stderr")) + + def command_output(cmd, shell=False): """Runs the command and returns its output after it has exited. + Returns a CommandOutput. The attributes ``stdout`` and ``stderr`` contain + byte strings of the respective output streams. + ``cmd`` is a list of arguments starting with the command names. The arguments are bytes on Unix and strings on Windows. If ``shell`` is true, ``cmd`` is assumed to be a string and passed to a @@ -829,7 +884,7 @@ def command_output(cmd, shell=False): cmd=' '.join(cmd), output=stdout + stderr, ) - return stdout + return CommandOutput(stdout, stderr) def max_filename_length(path, limit=MAX_FILENAME_LENGTH): @@ -876,25 +931,6 @@ def editor_command(): return open_anything() -def shlex_split(s): - """Split a Unicode or bytes string according to shell lexing rules. - - Raise `ValueError` if the string is not a well-formed shell string. - This is a workaround for a bug in some versions of Python. - """ - if not six.PY2 or isinstance(s, bytes): # Shlex works fine. - return shlex.split(s) - - elif isinstance(s, six.text_type): - # Work around a Python bug. - # http://bugs.python.org/issue6988 - bs = s.encode('utf-8') - return [c.decode('utf-8') for c in shlex.split(bs)] - - else: - raise TypeError(u'shlex_split called with non-string') - - def interactive_open(targets, command): """Open the files in `targets` by `exec`ing a new `command`, given as a Unicode string. (The new program takes over, and Python @@ -906,7 +942,7 @@ def interactive_open(targets, command): # Split the command string into its arguments. try: - args = shlex_split(command) + args = shlex.split(command) except ValueError: # Malformed shell tokens. args = [command] @@ -921,7 +957,7 @@ def _windows_long_path_name(short_path): """Use Windows' `GetLongPathNameW` via ctypes to get the canonical, long path given a short filename. """ - if not isinstance(short_path, six.text_type): + if not isinstance(short_path, str): short_path = short_path.decode(_fsencoding()) import ctypes @@ -982,7 +1018,7 @@ def raw_seconds_short(string): """ match = re.match(r'^(\d+):([0-5]\d)$', string) if not match: - raise ValueError(u'String not in M:SS format') + raise ValueError('String not in M:SS format') minutes, seconds = map(int, match.groups()) return float(minutes * 60 + seconds) @@ -1009,3 +1045,59 @@ def asciify_path(path, sep_replace): sep_replace ) return os.sep.join(path_components) + + +def par_map(transform, items): + """Apply the function `transform` to all the elements in the + iterable `items`, like `map(transform, items)` but with no return + value. The map *might* happen in parallel: it's parallel on Python 3 + and sequential on Python 2. + + The parallelism uses threads (not processes), so this is only useful + for IO-bound `transform`s. + """ + pool = ThreadPool() + pool.map(transform, items) + pool.close() + pool.join() + + +def lazy_property(func): + """A decorator that creates a lazily evaluated property. On first access, + the property is assigned the return value of `func`. This first value is + stored, so that future accesses do not have to evaluate `func` again. + + This behaviour is useful when `func` is expensive to evaluate, and it is + not certain that the result will be needed. + """ + field_name = '_' + func.__name__ + + @property + @functools.wraps(func) + def wrapper(self): + if hasattr(self, field_name): + return getattr(self, field_name) + + value = func(self) + setattr(self, field_name, value) + return value + + return wrapper + + +def decode_commandline_path(path): + """Prepare a path for substitution into commandline template. + + On Python 3, we need to construct the subprocess commands to invoke as a + Unicode string. On Unix, this is a little unfortunate---the OS is + expecting bytes---so we use surrogate escaping and decode with the + argument encoding, which is the same encoding that will then be + *reversed* to recover the same bytes before invoking the OS. On + Windows, we want to preserve the Unicode filename "as is." + """ + # On Python 3, the template is a Unicode string, which only supports + # substitution of Unicode variables. + if platform.system() == 'Windows': + return path.decode(_fsencoding()) + else: + return path.decode(arg_encoding(), 'surrogateescape') diff --git a/libs/common/beets/util/artresizer.py b/libs/common/beets/util/artresizer.py index e5117a6a..8683e228 100644 --- a/libs/common/beets/util/artresizer.py +++ b/libs/common/beets/util/artresizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Fabrice Laporte # @@ -16,38 +15,39 @@ """Abstraction layer to resize images using PIL, ImageMagick, or a public resizing proxy if neither is available. """ -from __future__ import division, absolute_import, print_function import subprocess import os +import os.path import re from tempfile import NamedTemporaryFile -from six.moves.urllib.parse import urlencode +from urllib.parse import urlencode from beets import logging from beets import util -import six # Resizing methods PIL = 1 IMAGEMAGICK = 2 WEBPROXY = 3 -if util.SNI_SUPPORTED: - PROXY_URL = 'https://images.weserv.nl/' -else: - PROXY_URL = 'http://images.weserv.nl/' +PROXY_URL = 'https://images.weserv.nl/' log = logging.getLogger('beets') -def resize_url(url, maxwidth): +def resize_url(url, maxwidth, quality=0): """Return a proxied image URL that resizes the original image to maxwidth (preserving aspect ratio). """ - return '{0}?{1}'.format(PROXY_URL, urlencode({ + params = { 'url': url.replace('http://', ''), 'w': maxwidth, - })) + } + + if quality > 0: + params['q'] = quality + + return '{}?{}'.format(PROXY_URL, urlencode(params)) def temp_file_for(path): @@ -59,48 +59,102 @@ def temp_file_for(path): return util.bytestring_path(f.name) -def pil_resize(maxwidth, path_in, path_out=None): +def pil_resize(maxwidth, path_in, path_out=None, quality=0, max_filesize=0): """Resize using Python Imaging Library (PIL). Return the output path of resized image. """ path_out = path_out or temp_file_for(path_in) from PIL import Image - log.debug(u'artresizer: PIL resizing {0} to {1}', + + log.debug('artresizer: PIL resizing {0} to {1}', util.displayable_path(path_in), util.displayable_path(path_out)) try: im = Image.open(util.syspath(path_in)) size = maxwidth, maxwidth im.thumbnail(size, Image.ANTIALIAS) - im.save(path_out) - return path_out - except IOError: - log.error(u"PIL cannot create thumbnail for '{0}'", + + if quality == 0: + # Use PIL's default quality. + quality = -1 + + # progressive=False only affects JPEGs and is the default, + # but we include it here for explicitness. + im.save(util.py3_path(path_out), quality=quality, progressive=False) + + if max_filesize > 0: + # If maximum filesize is set, we attempt to lower the quality of + # jpeg conversion by a proportional amount, up to 3 attempts + # First, set the maximum quality to either provided, or 95 + if quality > 0: + lower_qual = quality + else: + lower_qual = 95 + for i in range(5): + # 5 attempts is an abitrary choice + filesize = os.stat(util.syspath(path_out)).st_size + log.debug("PIL Pass {0} : Output size: {1}B", i, filesize) + if filesize <= max_filesize: + return path_out + # The relationship between filesize & quality will be + # image dependent. + lower_qual -= 10 + # Restrict quality dropping below 10 + if lower_qual < 10: + lower_qual = 10 + # Use optimize flag to improve filesize decrease + im.save(util.py3_path(path_out), quality=lower_qual, + optimize=True, progressive=False) + log.warning("PIL Failed to resize file to below {0}B", + max_filesize) + return path_out + + else: + return path_out + except OSError: + log.error("PIL cannot create thumbnail for '{0}'", util.displayable_path(path_in)) return path_in -def im_resize(maxwidth, path_in, path_out=None): - """Resize using ImageMagick's ``convert`` tool. - Return the output path of resized image. +def im_resize(maxwidth, path_in, path_out=None, quality=0, max_filesize=0): + """Resize using ImageMagick. + + Use the ``magick`` program or ``convert`` on older versions. Return + the output path of resized image. """ path_out = path_out or temp_file_for(path_in) - log.debug(u'artresizer: ImageMagick resizing {0} to {1}', + log.debug('artresizer: ImageMagick resizing {0} to {1}', util.displayable_path(path_in), util.displayable_path(path_out)) # "-resize WIDTHx>" shrinks images with the width larger # than the given width while maintaining the aspect ratio # with regards to the height. + # ImageMagick already seems to default to no interlace, but we include it + # here for the sake of explicitness. + cmd = ArtResizer.shared.im_convert_cmd + [ + util.syspath(path_in, prefix=False), + '-resize', f'{maxwidth}x>', + '-interlace', 'none', + ] + + if quality > 0: + cmd += ['-quality', f'{quality}'] + + # "-define jpeg:extent=SIZEb" sets the target filesize for imagemagick to + # SIZE in bytes. + if max_filesize > 0: + cmd += ['-define', f'jpeg:extent={max_filesize}b'] + + cmd.append(util.syspath(path_out, prefix=False)) + try: - util.command_output([ - 'convert', util.syspath(path_in, prefix=False), - '-resize', '{0}x>'.format(maxwidth), - util.syspath(path_out, prefix=False), - ]) + util.command_output(cmd) except subprocess.CalledProcessError: - log.warning(u'artresizer: IM convert failed for {0}', + log.warning('artresizer: IM convert failed for {0}', util.displayable_path(path_in)) return path_in + return path_out @@ -112,31 +166,33 @@ BACKEND_FUNCS = { def pil_getsize(path_in): from PIL import Image + try: im = Image.open(util.syspath(path_in)) return im.size - except IOError as exc: - log.error(u"PIL could not read file {}: {}", + except OSError as exc: + log.error("PIL could not read file {}: {}", util.displayable_path(path_in), exc) def im_getsize(path_in): - cmd = ['identify', '-format', '%w %h', - util.syspath(path_in, prefix=False)] + cmd = ArtResizer.shared.im_identify_cmd + \ + ['-format', '%w %h', util.syspath(path_in, prefix=False)] + try: - out = util.command_output(cmd) + out = util.command_output(cmd).stdout except subprocess.CalledProcessError as exc: - log.warning(u'ImageMagick size query failed') + log.warning('ImageMagick size query failed') log.debug( - u'`convert` exited with (status {}) when ' - u'getting size with command {}:\n{}', + '`convert` exited with (status {}) when ' + 'getting size with command {}:\n{}', exc.returncode, cmd, exc.output.strip() ) return try: return tuple(map(int, out.split(b' '))) except IndexError: - log.warning(u'Could not understand IM output: {0!r}', out) + log.warning('Could not understand IM output: {0!r}', out) BACKEND_GET_SIZE = { @@ -145,14 +201,115 @@ BACKEND_GET_SIZE = { } +def pil_deinterlace(path_in, path_out=None): + path_out = path_out or temp_file_for(path_in) + from PIL import Image + + try: + im = Image.open(util.syspath(path_in)) + im.save(util.py3_path(path_out), progressive=False) + return path_out + except IOError: + return path_in + + +def im_deinterlace(path_in, path_out=None): + path_out = path_out or temp_file_for(path_in) + + cmd = ArtResizer.shared.im_convert_cmd + [ + util.syspath(path_in, prefix=False), + '-interlace', 'none', + util.syspath(path_out, prefix=False), + ] + + try: + util.command_output(cmd) + return path_out + except subprocess.CalledProcessError: + return path_in + + +DEINTERLACE_FUNCS = { + PIL: pil_deinterlace, + IMAGEMAGICK: im_deinterlace, +} + + +def im_get_format(filepath): + cmd = ArtResizer.shared.im_identify_cmd + [ + '-format', '%[magick]', + util.syspath(filepath) + ] + + try: + return util.command_output(cmd).stdout + except subprocess.CalledProcessError: + return None + + +def pil_get_format(filepath): + from PIL import Image, UnidentifiedImageError + + try: + with Image.open(util.syspath(filepath)) as im: + return im.format + except (ValueError, TypeError, UnidentifiedImageError, FileNotFoundError): + log.exception("failed to detect image format for {}", filepath) + return None + + +BACKEND_GET_FORMAT = { + PIL: pil_get_format, + IMAGEMAGICK: im_get_format, +} + + +def im_convert_format(source, target, deinterlaced): + cmd = ArtResizer.shared.im_convert_cmd + [ + util.syspath(source), + *(["-interlace", "none"] if deinterlaced else []), + util.syspath(target), + ] + + try: + subprocess.check_call( + cmd, + stderr=subprocess.DEVNULL, + stdout=subprocess.DEVNULL + ) + return target + except subprocess.CalledProcessError: + return source + + +def pil_convert_format(source, target, deinterlaced): + from PIL import Image, UnidentifiedImageError + + try: + with Image.open(util.syspath(source)) as im: + im.save(util.py3_path(target), progressive=not deinterlaced) + return target + except (ValueError, TypeError, UnidentifiedImageError, FileNotFoundError, + OSError): + log.exception("failed to convert image {} -> {}", source, target) + return source + + +BACKEND_CONVERT_IMAGE_FORMAT = { + PIL: pil_convert_format, + IMAGEMAGICK: im_convert_format, +} + + class Shareable(type): """A pseudo-singleton metaclass that allows both shared and non-shared instances. The ``MyClass.shared`` property holds a lazily-created shared instance of ``MyClass`` while calling ``MyClass()`` to construct a new object works as usual. """ + def __init__(cls, name, bases, dict): - super(Shareable, cls).__init__(name, bases, dict) + super().__init__(name, bases, dict) cls._instance = None @property @@ -162,7 +319,7 @@ class Shareable(type): return cls._instance -class ArtResizer(six.with_metaclass(Shareable, object)): +class ArtResizer(metaclass=Shareable): """A singleton class that performs image resizes. """ @@ -170,21 +327,44 @@ class ArtResizer(six.with_metaclass(Shareable, object)): """Create a resizer object with an inferred method. """ self.method = self._check_method() - log.debug(u"artresizer: method is {0}", self.method) + log.debug("artresizer: method is {0}", self.method) self.can_compare = self._can_compare() - def resize(self, maxwidth, path_in, path_out=None): + # Use ImageMagick's magick binary when it's available. If it's + # not, fall back to the older, separate convert and identify + # commands. + if self.method[0] == IMAGEMAGICK: + self.im_legacy = self.method[2] + if self.im_legacy: + self.im_convert_cmd = ['convert'] + self.im_identify_cmd = ['identify'] + else: + self.im_convert_cmd = ['magick'] + self.im_identify_cmd = ['magick', 'identify'] + + def resize( + self, maxwidth, path_in, path_out=None, quality=0, max_filesize=0 + ): """Manipulate an image file according to the method, returning a new path. For PIL or IMAGEMAGIC methods, resizes the image to a - temporary file. For WEBPROXY, returns `path_in` unmodified. + temporary file and encodes with the specified quality level. + For WEBPROXY, returns `path_in` unmodified. """ if self.local: func = BACKEND_FUNCS[self.method[0]] - return func(maxwidth, path_in, path_out) + return func(maxwidth, path_in, path_out, + quality=quality, max_filesize=max_filesize) else: return path_in - def proxy_url(self, maxwidth, url): + def deinterlace(self, path_in, path_out=None): + if self.local: + func = DEINTERLACE_FUNCS[self.method[0]] + return func(path_in, path_out) + else: + return path_in + + def proxy_url(self, maxwidth, url, quality=0): """Modifies an image URL according the method, returning a new URL. For WEBPROXY, a URL on the proxy server is returned. Otherwise, the URL is returned unmodified. @@ -192,7 +372,7 @@ class ArtResizer(six.with_metaclass(Shareable, object)): if self.local: return url else: - return resize_url(url, maxwidth) + return resize_url(url, maxwidth, quality) @property def local(self): @@ -205,12 +385,50 @@ class ArtResizer(six.with_metaclass(Shareable, object)): """Return the size of an image file as an int couple (width, height) in pixels. - Only available locally + Only available locally. """ if self.local: func = BACKEND_GET_SIZE[self.method[0]] return func(path_in) + def get_format(self, path_in): + """Returns the format of the image as a string. + + Only available locally. + """ + if self.local: + func = BACKEND_GET_FORMAT[self.method[0]] + return func(path_in) + + def reformat(self, path_in, new_format, deinterlaced=True): + """Converts image to desired format, updating its extension, but + keeping the same filename. + + Only available locally. + """ + if not self.local: + return path_in + + new_format = new_format.lower() + # A nonexhaustive map of image "types" to extensions overrides + new_format = { + 'jpeg': 'jpg', + }.get(new_format, new_format) + + fname, ext = os.path.splitext(path_in) + path_new = fname + b'.' + new_format.encode('utf8') + func = BACKEND_CONVERT_IMAGE_FORMAT[self.method[0]] + + # allows the exception to propagate, while still making sure a changed + # file path was removed + result_path = path_in + try: + result_path = func(path_in, path_new, deinterlaced) + finally: + if result_path != path_in: + os.unlink(path_in) + return result_path + def _can_compare(self): """A boolean indicating whether image comparison is available""" @@ -218,10 +436,20 @@ class ArtResizer(six.with_metaclass(Shareable, object)): @staticmethod def _check_method(): - """Return a tuple indicating an available method and its version.""" + """Return a tuple indicating an available method and its version. + + The result has at least two elements: + - The method, eitehr WEBPROXY, PIL, or IMAGEMAGICK. + - The version. + + If the method is IMAGEMAGICK, there is also a third element: a + bool flag indicating whether to use the `magick` binary or + legacy single-purpose executables (`convert`, `identify`, etc.) + """ version = get_im_version() if version: - return IMAGEMAGICK, version + version, legacy = version + return IMAGEMAGICK, version, legacy version = get_pil_version() if version: @@ -231,31 +459,34 @@ class ArtResizer(six.with_metaclass(Shareable, object)): def get_im_version(): - """Return Image Magick version or None if it is unavailable - Try invoking ImageMagick's "convert". + """Get the ImageMagick version and legacy flag as a pair. Or return + None if ImageMagick is not available. """ - try: - out = util.command_output(['convert', '--version']) + for cmd_name, legacy in ((['magick'], False), (['convert'], True)): + cmd = cmd_name + ['--version'] - if b'imagemagick' in out.lower(): - pattern = br".+ (\d+)\.(\d+)\.(\d+).*" - match = re.search(pattern, out) - if match: - return (int(match.group(1)), - int(match.group(2)), - int(match.group(3))) - return (0,) + try: + out = util.command_output(cmd).stdout + except (subprocess.CalledProcessError, OSError) as exc: + log.debug('ImageMagick version check failed: {}', exc) + else: + if b'imagemagick' in out.lower(): + pattern = br".+ (\d+)\.(\d+)\.(\d+).*" + match = re.search(pattern, out) + if match: + version = (int(match.group(1)), + int(match.group(2)), + int(match.group(3))) + return version, legacy - except (subprocess.CalledProcessError, OSError) as exc: - log.debug(u'ImageMagick check `convert --version` failed: {}', exc) - return None + return None def get_pil_version(): - """Return Image Magick version or None if it is unavailable - Try importing PIL.""" + """Get the PIL/Pillow version, or None if it is unavailable. + """ try: - __import__('PIL', fromlist=[str('Image')]) + __import__('PIL', fromlist=['Image']) return (0,) except ImportError: return None diff --git a/libs/common/beets/util/bluelet.py b/libs/common/beets/util/bluelet.py index 0da17559..a40f3b2f 100644 --- a/libs/common/beets/util/bluelet.py +++ b/libs/common/beets/util/bluelet.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Extremely simple pure-Python implementation of coroutine-style asynchronous socket I/O. Inspired by, but inferior to, Eventlet. Bluelet can also be thought of as a less-terrible replacement for @@ -7,9 +5,7 @@ asyncore. Bluelet: easy concurrency without all the messy parallelism. """ -from __future__ import division, absolute_import, print_function -import six import socket import select import sys @@ -22,7 +18,7 @@ import collections # Basic events used for thread scheduling. -class Event(object): +class Event: """Just a base class identifying Bluelet events. An event is an object yielded from a Bluelet thread coroutine to suspend operation and communicate with the scheduler. @@ -201,7 +197,7 @@ class ThreadException(Exception): self.exc_info = exc_info def reraise(self): - six.reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2]) + raise self.exc_info[1].with_traceback(self.exc_info[2]) SUSPENDED = Event() # Special sentinel placeholder for suspended threads. @@ -336,16 +332,20 @@ def run(root_coro): break # Wait and fire. - event2coro = dict((v, k) for k, v in threads.items()) + event2coro = {v: k for k, v in threads.items()} for event in _event_select(threads.values()): # Run the IO operation, but catch socket errors. try: value = event.fire() - except socket.error as exc: + except OSError as exc: if isinstance(exc.args, tuple) and \ exc.args[0] == errno.EPIPE: # Broken pipe. Remote host disconnected. pass + elif isinstance(exc.args, tuple) and \ + exc.args[0] == errno.ECONNRESET: + # Connection was reset by peer. + pass else: traceback.print_exc() # Abort the coroutine. @@ -386,7 +386,7 @@ class SocketClosedError(Exception): pass -class Listener(object): +class Listener: """A socket wrapper object for listening sockets. """ def __init__(self, host, port): @@ -416,7 +416,7 @@ class Listener(object): self.sock.close() -class Connection(object): +class Connection: """A socket wrapper object for connected sockets. """ def __init__(self, sock, addr): @@ -541,7 +541,7 @@ def spawn(coro): and child coroutines run concurrently. """ if not isinstance(coro, types.GeneratorType): - raise ValueError(u'%s is not a coroutine' % coro) + raise ValueError('%s is not a coroutine' % coro) return SpawnEvent(coro) @@ -551,7 +551,7 @@ def call(coro): returns a value using end(), then this event returns that value. """ if not isinstance(coro, types.GeneratorType): - raise ValueError(u'%s is not a coroutine' % coro) + raise ValueError('%s is not a coroutine' % coro) return DelegationEvent(coro) diff --git a/libs/common/beets/util/confit.py b/libs/common/beets/util/confit.py index b5513f48..dd912c44 100644 --- a/libs/common/beets/util/confit.py +++ b/libs/common/beets/util/confit.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- -# This file is part of Confuse. -# Copyright 2016, Adrian Sampson. +# This file is part of beets. +# Copyright 2016-2019, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -13,1501 +12,17 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -"""Worry-free YAML configuration files. -""" -from __future__ import division, absolute_import, print_function -import platform -import os -import pkgutil -import sys -import yaml -import collections -import re -from collections import OrderedDict +import confuse -UNIX_DIR_VAR = 'XDG_CONFIG_HOME' -UNIX_DIR_FALLBACK = '~/.config' -WINDOWS_DIR_VAR = 'APPDATA' -WINDOWS_DIR_FALLBACK = '~\\AppData\\Roaming' -MAC_DIR = '~/Library/Application Support' +import warnings +warnings.warn("beets.util.confit is deprecated; use confuse instead") -CONFIG_FILENAME = 'config.yaml' -DEFAULT_FILENAME = 'config_default.yaml' -ROOT_NAME = 'root' +# Import everything from the confuse module into this module. +for key, value in confuse.__dict__.items(): + if key not in ['__name__']: + globals()[key] = value -YAML_TAB_PROBLEM = "found character '\\t' that cannot start any token" -REDACTED_TOMBSTONE = 'REDACTED' - - -# Utilities. - -PY3 = sys.version_info[0] == 3 -STRING = str if PY3 else unicode # noqa: F821 -BASESTRING = str if PY3 else basestring # noqa: F821 -NUMERIC_TYPES = (int, float) if PY3 else (int, float, long) # noqa: F821 - - -def iter_first(sequence): - """Get the first element from an iterable or raise a ValueError if - the iterator generates no values. - """ - it = iter(sequence) - try: - return next(it) - except StopIteration: - raise ValueError() - - -# Exceptions. - -class ConfigError(Exception): - """Base class for exceptions raised when querying a configuration. - """ - - -class NotFoundError(ConfigError): - """A requested value could not be found in the configuration trees. - """ - - -class ConfigValueError(ConfigError): - """The value in the configuration is illegal.""" - - -class ConfigTypeError(ConfigValueError): - """The value in the configuration did not match the expected type. - """ - - -class ConfigTemplateError(ConfigError): - """Base class for exceptions raised because of an invalid template. - """ - - -class ConfigReadError(ConfigError): - """A configuration file could not be read.""" - def __init__(self, filename, reason=None): - self.filename = filename - self.reason = reason - - message = u'file {0} could not be read'.format(filename) - if isinstance(reason, yaml.scanner.ScannerError) and \ - reason.problem == YAML_TAB_PROBLEM: - # Special-case error message for tab indentation in YAML markup. - message += u': found tab character at line {0}, column {1}'.format( - reason.problem_mark.line + 1, - reason.problem_mark.column + 1, - ) - elif reason: - # Generic error message uses exception's message. - message += u': {0}'.format(reason) - - super(ConfigReadError, self).__init__(message) - - -# Views and sources. - -class ConfigSource(dict): - """A dictionary augmented with metadata about the source of the - configuration. - """ - def __init__(self, value, filename=None, default=False): - super(ConfigSource, self).__init__(value) - if filename is not None and not isinstance(filename, BASESTRING): - raise TypeError(u'filename must be a string or None') - self.filename = filename - self.default = default - - def __repr__(self): - return 'ConfigSource({0!r}, {1!r}, {2!r})'.format( - super(ConfigSource, self), - self.filename, - self.default, - ) - - @classmethod - def of(cls, value): - """Given either a dictionary or a `ConfigSource` object, return - a `ConfigSource` object. This lets a function accept either type - of object as an argument. - """ - if isinstance(value, ConfigSource): - return value - elif isinstance(value, dict): - return ConfigSource(value) - else: - raise TypeError(u'source value must be a dict') - - -class ConfigView(object): - """A configuration "view" is a query into a program's configuration - data. A view represents a hypothetical location in the configuration - tree; to extract the data from the location, a client typically - calls the ``view.get()`` method. The client can access children in - the tree (subviews) by subscripting the parent view (i.e., - ``view[key]``). - """ - - name = None - """The name of the view, depicting the path taken through the - configuration in Python-like syntax (e.g., ``foo['bar'][42]``). - """ - - def resolve(self): - """The core (internal) data retrieval method. Generates (value, - source) pairs for each source that contains a value for this - view. May raise ConfigTypeError if a type error occurs while - traversing a source. - """ - raise NotImplementedError - - def first(self): - """Return a (value, source) pair for the first object found for - this view. This amounts to the first element returned by - `resolve`. If no values are available, a NotFoundError is - raised. - """ - pairs = self.resolve() - try: - return iter_first(pairs) - except ValueError: - raise NotFoundError(u"{0} not found".format(self.name)) - - def exists(self): - """Determine whether the view has a setting in any source. - """ - try: - self.first() - except NotFoundError: - return False - return True - - def add(self, value): - """Set the *default* value for this configuration view. The - specified value is added as the lowest-priority configuration - data source. - """ - raise NotImplementedError - - def set(self, value): - """*Override* the value for this configuration view. The - specified value is added as the highest-priority configuration - data source. - """ - raise NotImplementedError - - def root(self): - """The RootView object from which this view is descended. - """ - raise NotImplementedError - - def __repr__(self): - return '<{}: {}>'.format(self.__class__.__name__, self.name) - - def __iter__(self): - """Iterate over the keys of a dictionary view or the *subviews* - of a list view. - """ - # Try getting the keys, if this is a dictionary view. - try: - keys = self.keys() - for key in keys: - yield key - - except ConfigTypeError: - # Otherwise, try iterating over a list. - collection = self.get() - if not isinstance(collection, (list, tuple)): - raise ConfigTypeError( - u'{0} must be a dictionary or a list, not {1}'.format( - self.name, type(collection).__name__ - ) - ) - - # Yield all the indices in the list. - for index in range(len(collection)): - yield self[index] - - def __getitem__(self, key): - """Get a subview of this view.""" - return Subview(self, key) - - def __setitem__(self, key, value): - """Create an overlay source to assign a given key under this - view. - """ - self.set({key: value}) - - def __contains__(self, key): - return self[key].exists() - - def set_args(self, namespace): - """Overlay parsed command-line arguments, generated by a library - like argparse or optparse, onto this view's value. ``namespace`` - can be a ``dict`` or namespace object. - """ - args = {} - if isinstance(namespace, dict): - items = namespace.items() - else: - items = namespace.__dict__.items() - for key, value in items: - if value is not None: # Avoid unset options. - args[key] = value - self.set(args) - - # Magical conversions. These special methods make it possible to use - # View objects somewhat transparently in certain circumstances. For - # example, rather than using ``view.get(bool)``, it's possible to - # just say ``bool(view)`` or use ``view`` in a conditional. - - def __str__(self): - """Get the value for this view as a bytestring. - """ - if PY3: - return self.__unicode__() - else: - return bytes(self.get()) - - def __unicode__(self): - """Get the value for this view as a Unicode string. - """ - return STRING(self.get()) - - def __nonzero__(self): - """Gets the value for this view as a boolean. (Python 2 only.) - """ - return self.__bool__() - - def __bool__(self): - """Gets the value for this view as a boolean. (Python 3 only.) - """ - return bool(self.get()) - - # Dictionary emulation methods. - - def keys(self): - """Returns a list containing all the keys available as subviews - of the current views. This enumerates all the keys in *all* - dictionaries matching the current view, in contrast to - ``view.get(dict).keys()``, which gets all the keys for the - *first* dict matching the view. If the object for this view in - any source is not a dict, then a ConfigTypeError is raised. The - keys are ordered according to how they appear in each source. - """ - keys = [] - - for dic, _ in self.resolve(): - try: - cur_keys = dic.keys() - except AttributeError: - raise ConfigTypeError( - u'{0} must be a dict, not {1}'.format( - self.name, type(dic).__name__ - ) - ) - - for key in cur_keys: - if key not in keys: - keys.append(key) - - return keys - - def items(self): - """Iterates over (key, subview) pairs contained in dictionaries - from *all* sources at this view. If the object for this view in - any source is not a dict, then a ConfigTypeError is raised. - """ - for key in self.keys(): - yield key, self[key] - - def values(self): - """Iterates over all the subviews contained in dictionaries from - *all* sources at this view. If the object for this view in any - source is not a dict, then a ConfigTypeError is raised. - """ - for key in self.keys(): - yield self[key] - - # List/sequence emulation. - - def all_contents(self): - """Iterates over all subviews from collections at this view from - *all* sources. If the object for this view in any source is not - iterable, then a ConfigTypeError is raised. This method is - intended to be used when the view indicates a list; this method - will concatenate the contents of the list from all sources. - """ - for collection, _ in self.resolve(): - try: - it = iter(collection) - except TypeError: - raise ConfigTypeError( - u'{0} must be an iterable, not {1}'.format( - self.name, type(collection).__name__ - ) - ) - for value in it: - yield value - - # Validation and conversion. - - def flatten(self, redact=False): - """Create a hierarchy of OrderedDicts containing the data from - this view, recursively reifying all views to get their - represented values. - - If `redact` is set, then sensitive values are replaced with - the string "REDACTED". - """ - od = OrderedDict() - for key, view in self.items(): - if redact and view.redact: - od[key] = REDACTED_TOMBSTONE - else: - try: - od[key] = view.flatten(redact=redact) - except ConfigTypeError: - od[key] = view.get() - return od - - def get(self, template=None): - """Retrieve the value for this view according to the template. - - The `template` against which the values are checked can be - anything convertible to a `Template` using `as_template`. This - means you can pass in a default integer or string value, for - example, or a type to just check that something matches the type - you expect. - - May raise a `ConfigValueError` (or its subclass, - `ConfigTypeError`) or a `NotFoundError` when the configuration - doesn't satisfy the template. - """ - return as_template(template).value(self, template) - - # Shortcuts for common templates. - - def as_filename(self): - """Get the value as a path. Equivalent to `get(Filename())`. - """ - return self.get(Filename()) - - def as_choice(self, choices): - """Get the value from a list of choices. Equivalent to - `get(Choice(choices))`. - """ - return self.get(Choice(choices)) - - def as_number(self): - """Get the value as any number type: int or float. Equivalent to - `get(Number())`. - """ - return self.get(Number()) - - def as_str_seq(self, split=True): - """Get the value as a sequence of strings. Equivalent to - `get(StrSeq())`. - """ - return self.get(StrSeq(split=split)) - - def as_pairs(self, default_value=None): - """Get the value as a sequence of pairs of two strings. Equivalent to - `get(Pairs())`. - """ - return self.get(Pairs(default_value=default_value)) - - def as_str(self): - """Get the value as a (Unicode) string. Equivalent to - `get(unicode)` on Python 2 and `get(str)` on Python 3. - """ - return self.get(String()) - - # Redaction. - - @property - def redact(self): - """Whether the view contains sensitive information and should be - redacted from output. - """ - return () in self.get_redactions() - - @redact.setter - def redact(self, flag): - self.set_redaction((), flag) - - def set_redaction(self, path, flag): - """Add or remove a redaction for a key path, which should be an - iterable of keys. - """ - raise NotImplementedError() - - def get_redactions(self): - """Get the set of currently-redacted sub-key-paths at this view. - """ - raise NotImplementedError() - - -class RootView(ConfigView): - """The base of a view hierarchy. This view keeps track of the - sources that may be accessed by subviews. - """ - def __init__(self, sources): - """Create a configuration hierarchy for a list of sources. At - least one source must be provided. The first source in the list - has the highest priority. - """ - self.sources = list(sources) - self.name = ROOT_NAME - self.redactions = set() - - def add(self, obj): - self.sources.append(ConfigSource.of(obj)) - - def set(self, value): - self.sources.insert(0, ConfigSource.of(value)) - - def resolve(self): - return ((dict(s), s) for s in self.sources) - - def clear(self): - """Remove all sources (and redactions) from this - configuration. - """ - del self.sources[:] - self.redactions.clear() - - def root(self): - return self - - def set_redaction(self, path, flag): - if flag: - self.redactions.add(path) - elif path in self.redactions: - self.redactions.remove(path) - - def get_redactions(self): - return self.redactions - - -class Subview(ConfigView): - """A subview accessed via a subscript of a parent view.""" - def __init__(self, parent, key): - """Make a subview of a parent view for a given subscript key. - """ - self.parent = parent - self.key = key - - # Choose a human-readable name for this view. - if isinstance(self.parent, RootView): - self.name = '' - else: - self.name = self.parent.name - if not isinstance(self.key, int): - self.name += '.' - if isinstance(self.key, int): - self.name += u'#{0}'.format(self.key) - elif isinstance(self.key, bytes): - self.name += self.key.decode('utf-8') - elif isinstance(self.key, STRING): - self.name += self.key - else: - self.name += repr(self.key) - - def resolve(self): - for collection, source in self.parent.resolve(): - try: - value = collection[self.key] - except IndexError: - # List index out of bounds. - continue - except KeyError: - # Dict key does not exist. - continue - except TypeError: - # Not subscriptable. - raise ConfigTypeError( - u"{0} must be a collection, not {1}".format( - self.parent.name, type(collection).__name__ - ) - ) - yield value, source - - def set(self, value): - self.parent.set({self.key: value}) - - def add(self, value): - self.parent.add({self.key: value}) - - def root(self): - return self.parent.root() - - def set_redaction(self, path, flag): - self.parent.set_redaction((self.key,) + path, flag) - - def get_redactions(self): - return (kp[1:] for kp in self.parent.get_redactions() - if kp and kp[0] == self.key) - - -# Config file paths, including platform-specific paths and in-package -# defaults. - -# Based on get_root_path from Flask by Armin Ronacher. -def _package_path(name): - """Returns the path to the package containing the named module or - None if the path could not be identified (e.g., if - ``name == "__main__"``). - """ - loader = pkgutil.get_loader(name) - if loader is None or name == '__main__': - return None - - if hasattr(loader, 'get_filename'): - filepath = loader.get_filename(name) - else: - # Fall back to importing the specified module. - __import__(name) - filepath = sys.modules[name].__file__ - - return os.path.dirname(os.path.abspath(filepath)) - - -def config_dirs(): - """Return a platform-specific list of candidates for user - configuration directories on the system. - - The candidates are in order of priority, from highest to lowest. The - last element is the "fallback" location to be used when no - higher-priority config file exists. - """ - paths = [] - - if platform.system() == 'Darwin': - paths.append(MAC_DIR) - paths.append(UNIX_DIR_FALLBACK) - if UNIX_DIR_VAR in os.environ: - paths.append(os.environ[UNIX_DIR_VAR]) - - elif platform.system() == 'Windows': - paths.append(WINDOWS_DIR_FALLBACK) - if WINDOWS_DIR_VAR in os.environ: - paths.append(os.environ[WINDOWS_DIR_VAR]) - - else: - # Assume Unix. - paths.append(UNIX_DIR_FALLBACK) - if UNIX_DIR_VAR in os.environ: - paths.append(os.environ[UNIX_DIR_VAR]) - - # Expand and deduplicate paths. - out = [] - for path in paths: - path = os.path.abspath(os.path.expanduser(path)) - if path not in out: - out.append(path) - return out - - -# YAML loading. - -class Loader(yaml.SafeLoader): - """A customized YAML loader. This loader deviates from the official - YAML spec in a few convenient ways: - - - All strings as are Unicode objects. - - All maps are OrderedDicts. - - Strings can begin with % without quotation. - """ - # All strings should be Unicode objects, regardless of contents. - def _construct_unicode(self, node): - return self.construct_scalar(node) - - # Use ordered dictionaries for every YAML map. - # From https://gist.github.com/844388 - def construct_yaml_map(self, node): - data = OrderedDict() - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_mapping(self, node, deep=False): - if isinstance(node, yaml.MappingNode): - self.flatten_mapping(node) - else: - raise yaml.constructor.ConstructorError( - None, None, - u'expected a mapping node, but found %s' % node.id, - node.start_mark - ) - - mapping = OrderedDict() - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - try: - hash(key) - except TypeError as exc: - raise yaml.constructor.ConstructorError( - u'while constructing a mapping', - node.start_mark, 'found unacceptable key (%s)' % exc, - key_node.start_mark - ) - value = self.construct_object(value_node, deep=deep) - mapping[key] = value - return mapping - - # Allow bare strings to begin with %. Directives are still detected. - def check_plain(self): - plain = super(Loader, self).check_plain() - return plain or self.peek() == '%' - - -Loader.add_constructor('tag:yaml.org,2002:str', Loader._construct_unicode) -Loader.add_constructor('tag:yaml.org,2002:map', Loader.construct_yaml_map) -Loader.add_constructor('tag:yaml.org,2002:omap', Loader.construct_yaml_map) - - -def load_yaml(filename): - """Read a YAML document from a file. If the file cannot be read or - parsed, a ConfigReadError is raised. - """ - try: - with open(filename, 'rb') as f: - return yaml.load(f, Loader=Loader) - except (IOError, yaml.error.YAMLError) as exc: - raise ConfigReadError(filename, exc) - - -# YAML dumping. - -class Dumper(yaml.SafeDumper): - """A PyYAML Dumper that represents OrderedDicts as ordinary mappings - (in order, of course). - """ - # From http://pyyaml.org/attachment/ticket/161/use_ordered_dict.py - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - node = yaml.MappingNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = False - if hasattr(mapping, 'items'): - mapping = list(mapping.items()) - for item_key, item_value in mapping: - node_key = self.represent_data(item_key) - node_value = self.represent_data(item_value) - if not (isinstance(node_key, yaml.ScalarNode) and - not node_key.style): - best_style = False - if not (isinstance(node_value, yaml.ScalarNode) and - not node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_list(self, data): - """If a list has less than 4 items, represent it in inline style - (i.e. comma separated, within square brackets). - """ - node = super(Dumper, self).represent_list(data) - length = len(data) - if self.default_flow_style is None and length < 4: - node.flow_style = True - elif self.default_flow_style is None: - node.flow_style = False - return node - - def represent_bool(self, data): - """Represent bool as 'yes' or 'no' instead of 'true' or 'false'. - """ - if data: - value = u'yes' - else: - value = u'no' - return self.represent_scalar('tag:yaml.org,2002:bool', value) - - def represent_none(self, data): - """Represent a None value with nothing instead of 'none'. - """ - return self.represent_scalar('tag:yaml.org,2002:null', '') - - -Dumper.add_representer(OrderedDict, Dumper.represent_dict) -Dumper.add_representer(bool, Dumper.represent_bool) -Dumper.add_representer(type(None), Dumper.represent_none) -Dumper.add_representer(list, Dumper.represent_list) - - -def restore_yaml_comments(data, default_data): - """Scan default_data for comments (we include empty lines in our - definition of comments) and place them before the same keys in data. - Only works with comments that are on one or more own lines, i.e. - not next to a yaml mapping. - """ - comment_map = dict() - default_lines = iter(default_data.splitlines()) - for line in default_lines: - if not line: - comment = "\n" - elif line.startswith("#"): - comment = "{0}\n".format(line) - else: - continue - while True: - line = next(default_lines) - if line and not line.startswith("#"): - break - comment += "{0}\n".format(line) - key = line.split(':')[0].strip() - comment_map[key] = comment - out_lines = iter(data.splitlines()) - out_data = "" - for line in out_lines: - key = line.split(':')[0].strip() - if key in comment_map: - out_data += comment_map[key] - out_data += "{0}\n".format(line) - return out_data - - -# Main interface. - -class Configuration(RootView): - def __init__(self, appname, modname=None, read=True): - """Create a configuration object by reading the - automatically-discovered config files for the application for a - given name. If `modname` is specified, it should be the import - name of a module whose package will be searched for a default - config file. (Otherwise, no defaults are used.) Pass `False` for - `read` to disable automatic reading of all discovered - configuration files. Use this when creating a configuration - object at module load time and then call the `read` method - later. - """ - super(Configuration, self).__init__([]) - self.appname = appname - self.modname = modname - - self._env_var = '{0}DIR'.format(self.appname.upper()) - - if read: - self.read() - - def user_config_path(self): - """Points to the location of the user configuration. - - The file may not exist. - """ - return os.path.join(self.config_dir(), CONFIG_FILENAME) - - def _add_user_source(self): - """Add the configuration options from the YAML file in the - user's configuration directory (given by `config_dir`) if it - exists. - """ - filename = self.user_config_path() - if os.path.isfile(filename): - self.add(ConfigSource(load_yaml(filename) or {}, filename)) - - def _add_default_source(self): - """Add the package's default configuration settings. This looks - for a YAML file located inside the package for the module - `modname` if it was given. - """ - if self.modname: - pkg_path = _package_path(self.modname) - if pkg_path: - filename = os.path.join(pkg_path, DEFAULT_FILENAME) - if os.path.isfile(filename): - self.add(ConfigSource(load_yaml(filename), filename, True)) - - def read(self, user=True, defaults=True): - """Find and read the files for this configuration and set them - as the sources for this configuration. To disable either - discovered user configuration files or the in-package defaults, - set `user` or `defaults` to `False`. - """ - if user: - self._add_user_source() - if defaults: - self._add_default_source() - - def config_dir(self): - """Get the path to the user configuration directory. The - directory is guaranteed to exist as a postcondition (one may be - created if none exist). - - If the application's ``...DIR`` environment variable is set, it - is used as the configuration directory. Otherwise, - platform-specific standard configuration locations are searched - for a ``config.yaml`` file. If no configuration file is found, a - fallback path is used. - """ - # If environment variable is set, use it. - if self._env_var in os.environ: - appdir = os.environ[self._env_var] - appdir = os.path.abspath(os.path.expanduser(appdir)) - if os.path.isfile(appdir): - raise ConfigError(u'{0} must be a directory'.format( - self._env_var - )) - - else: - # Search platform-specific locations. If no config file is - # found, fall back to the final directory in the list. - for confdir in config_dirs(): - appdir = os.path.join(confdir, self.appname) - if os.path.isfile(os.path.join(appdir, CONFIG_FILENAME)): - break - - # Ensure that the directory exists. - if not os.path.isdir(appdir): - os.makedirs(appdir) - return appdir - - def set_file(self, filename): - """Parses the file as YAML and inserts it into the configuration - sources with highest priority. - """ - filename = os.path.abspath(filename) - self.set(ConfigSource(load_yaml(filename), filename)) - - def dump(self, full=True, redact=False): - """Dump the Configuration object to a YAML file. - - The order of the keys is determined from the default - configuration file. All keys not in the default configuration - will be appended to the end of the file. - - :param filename: The file to dump the configuration to, or None - if the YAML string should be returned instead - :type filename: unicode - :param full: Dump settings that don't differ from the defaults - as well - :param redact: Remove sensitive information (views with the `redact` - flag set) from the output - """ - if full: - out_dict = self.flatten(redact=redact) - else: - # Exclude defaults when flattening. - sources = [s for s in self.sources if not s.default] - temp_root = RootView(sources) - temp_root.redactions = self.redactions - out_dict = temp_root.flatten(redact=redact) - - yaml_out = yaml.dump(out_dict, Dumper=Dumper, - default_flow_style=None, indent=4, - width=1000) - - # Restore comments to the YAML text. - default_source = None - for source in self.sources: - if source.default: - default_source = source - break - if default_source and default_source.filename: - with open(default_source.filename, 'rb') as fp: - default_data = fp.read() - yaml_out = restore_yaml_comments(yaml_out, - default_data.decode('utf8')) - - return yaml_out - - -class LazyConfig(Configuration): - """A Configuration at reads files on demand when it is first - accessed. This is appropriate for using as a global config object at - the module level. - """ - def __init__(self, appname, modname=None): - super(LazyConfig, self).__init__(appname, modname, False) - self._materialized = False # Have we read the files yet? - self._lazy_prefix = [] # Pre-materialization calls to set(). - self._lazy_suffix = [] # Calls to add(). - - def read(self, user=True, defaults=True): - self._materialized = True - super(LazyConfig, self).read(user, defaults) - - def resolve(self): - if not self._materialized: - # Read files and unspool buffers. - self.read() - self.sources += self._lazy_suffix - self.sources[:0] = self._lazy_prefix - return super(LazyConfig, self).resolve() - - def add(self, value): - super(LazyConfig, self).add(value) - if not self._materialized: - # Buffer additions to end. - self._lazy_suffix += self.sources - del self.sources[:] - - def set(self, value): - super(LazyConfig, self).set(value) - if not self._materialized: - # Buffer additions to beginning. - self._lazy_prefix[:0] = self.sources - del self.sources[:] - - def clear(self): - """Remove all sources from this configuration.""" - super(LazyConfig, self).clear() - self._lazy_suffix = [] - self._lazy_prefix = [] - - -# "Validated" configuration views: experimental! - - -REQUIRED = object() -"""A sentinel indicating that there is no default value and an exception -should be raised when the value is missing. -""" - - -class Template(object): - """A value template for configuration fields. - - The template works like a type and instructs Confuse about how to - interpret a deserialized YAML value. This includes type conversions, - providing a default value, and validating for errors. For example, a - filepath type might expand tildes and check that the file exists. - """ - def __init__(self, default=REQUIRED): - """Create a template with a given default value. - - If `default` is the sentinel `REQUIRED` (as it is by default), - then an error will be raised when a value is missing. Otherwise, - missing values will instead return `default`. - """ - self.default = default - - def __call__(self, view): - """Invoking a template on a view gets the view's value according - to the template. - """ - return self.value(view, self) - - def value(self, view, template=None): - """Get the value for a `ConfigView`. - - May raise a `NotFoundError` if the value is missing (and the - template requires it) or a `ConfigValueError` for invalid values. - """ - if view.exists(): - value, _ = view.first() - return self.convert(value, view) - elif self.default is REQUIRED: - # Missing required value. This is an error. - raise NotFoundError(u"{0} not found".format(view.name)) - else: - # Missing value, but not required. - return self.default - - def convert(self, value, view): - """Convert the YAML-deserialized value to a value of the desired - type. - - Subclasses should override this to provide useful conversions. - May raise a `ConfigValueError` when the configuration is wrong. - """ - # Default implementation does no conversion. - return value - - def fail(self, message, view, type_error=False): - """Raise an exception indicating that a value cannot be - accepted. - - `type_error` indicates whether the error is due to a type - mismatch rather than a malformed value. In this case, a more - specific exception is raised. - """ - exc_class = ConfigTypeError if type_error else ConfigValueError - raise exc_class( - u'{0}: {1}'.format(view.name, message) - ) - - def __repr__(self): - return '{0}({1})'.format( - type(self).__name__, - '' if self.default is REQUIRED else repr(self.default), - ) - - -class Integer(Template): - """An integer configuration value template. - """ - def convert(self, value, view): - """Check that the value is an integer. Floats are rounded. - """ - if isinstance(value, int): - return value - elif isinstance(value, float): - return int(value) - else: - self.fail(u'must be a number', view, True) - - -class Number(Template): - """A numeric type: either an integer or a floating-point number. - """ - def convert(self, value, view): - """Check that the value is an int or a float. - """ - if isinstance(value, NUMERIC_TYPES): - return value - else: - self.fail( - u'must be numeric, not {0}'.format(type(value).__name__), - view, - True - ) - - -class MappingTemplate(Template): - """A template that uses a dictionary to specify other types for the - values for a set of keys and produce a validated `AttrDict`. - """ - def __init__(self, mapping): - """Create a template according to a dict (mapping). The - mapping's values should themselves either be Types or - convertible to Types. - """ - subtemplates = {} - for key, typ in mapping.items(): - subtemplates[key] = as_template(typ) - self.subtemplates = subtemplates - - def value(self, view, template=None): - """Get a dict with the same keys as the template and values - validated according to the value types. - """ - out = AttrDict() - for key, typ in self.subtemplates.items(): - out[key] = typ.value(view[key], self) - return out - - def __repr__(self): - return 'MappingTemplate({0})'.format(repr(self.subtemplates)) - - -class String(Template): - """A string configuration value template. - """ - def __init__(self, default=REQUIRED, pattern=None): - """Create a template with the added optional `pattern` argument, - a regular expression string that the value should match. - """ - super(String, self).__init__(default) - self.pattern = pattern - if pattern: - self.regex = re.compile(pattern) - - def __repr__(self): - args = [] - - if self.default is not REQUIRED: - args.append(repr(self.default)) - - if self.pattern is not None: - args.append('pattern=' + repr(self.pattern)) - - return 'String({0})'.format(', '.join(args)) - - def convert(self, value, view): - """Check that the value is a string and matches the pattern. - """ - if isinstance(value, BASESTRING): - if self.pattern and not self.regex.match(value): - self.fail( - u"must match the pattern {0}".format(self.pattern), - view - ) - return value - else: - self.fail(u'must be a string', view, True) - - -class Choice(Template): - """A template that permits values from a sequence of choices. - """ - def __init__(self, choices): - """Create a template that validates any of the values from the - iterable `choices`. - - If `choices` is a map, then the corresponding value is emitted. - Otherwise, the value itself is emitted. - """ - self.choices = choices - - def convert(self, value, view): - """Ensure that the value is among the choices (and remap if the - choices are a mapping). - """ - if value not in self.choices: - self.fail( - u'must be one of {0}, not {1}'.format( - repr(list(self.choices)), repr(value) - ), - view - ) - - if isinstance(self.choices, collections.Mapping): - return self.choices[value] - else: - return value - - def __repr__(self): - return 'Choice({0!r})'.format(self.choices) - - -class OneOf(Template): - """A template that permits values complying to one of the given templates. - """ - def __init__(self, allowed, default=REQUIRED): - super(OneOf, self).__init__(default) - self.allowed = list(allowed) - - def __repr__(self): - args = [] - - if self.allowed is not None: - args.append('allowed=' + repr(self.allowed)) - - if self.default is not REQUIRED: - args.append(repr(self.default)) - - return 'OneOf({0})'.format(', '.join(args)) - - def value(self, view, template): - self.template = template - return super(OneOf, self).value(view, template) - - def convert(self, value, view): - """Ensure that the value follows at least one template. - """ - is_mapping = isinstance(self.template, MappingTemplate) - - for candidate in self.allowed: - try: - if is_mapping: - if isinstance(candidate, Filename) and \ - candidate.relative_to: - next_template = candidate.template_with_relatives( - view, - self.template - ) - - next_template.subtemplates[view.key] = as_template( - candidate - ) - else: - next_template = MappingTemplate({view.key: candidate}) - - return view.parent.get(next_template)[view.key] - else: - return view.get(candidate) - except ConfigTemplateError: - raise - except ConfigError: - pass - except ValueError as exc: - raise ConfigTemplateError(exc) - - self.fail( - u'must be one of {0}, not {1}'.format( - repr(self.allowed), repr(value) - ), - view - ) - - -class StrSeq(Template): - """A template for values that are lists of strings. - - Validates both actual YAML string lists and single strings. Strings - can optionally be split on whitespace. - """ - def __init__(self, split=True): - """Create a new template. - - `split` indicates whether, when the underlying value is a single - string, it should be split on whitespace. Otherwise, the - resulting value is a list containing a single string. - """ - super(StrSeq, self).__init__() - self.split = split - - def _convert_value(self, x, view): - if isinstance(x, STRING): - return x - elif isinstance(x, bytes): - return x.decode('utf-8', 'ignore') - else: - self.fail(u'must be a list of strings', view, True) - - def convert(self, value, view): - if isinstance(value, bytes): - value = value.decode('utf-8', 'ignore') - - if isinstance(value, STRING): - if self.split: - value = value.split() - else: - value = [value] - else: - try: - value = list(value) - except TypeError: - self.fail(u'must be a whitespace-separated string or a list', - view, True) - - return [self._convert_value(v, view) for v in value] - - -class Pairs(StrSeq): - """A template for ordered key-value pairs. - - This can either be given with the same syntax as for `StrSeq` (i.e. without - values), or as a list of strings and/or single-element mappings such as:: - - - key: value - - [key, value] - - key - - The result is a list of two-element tuples. If no value is provided, the - `default_value` will be returned as the second element. - """ - - def __init__(self, default_value=None): - """Create a new template. - - `default` is the dictionary value returned for items that are not - a mapping, but a single string. - """ - super(Pairs, self).__init__(split=True) - self.default_value = default_value - - def _convert_value(self, x, view): - try: - return (super(Pairs, self)._convert_value(x, view), - self.default_value) - except ConfigTypeError: - if isinstance(x, collections.Mapping): - if len(x) != 1: - self.fail(u'must be a single-element mapping', view, True) - k, v = iter_first(x.items()) - elif isinstance(x, collections.Sequence): - if len(x) != 2: - self.fail(u'must be a two-element list', view, True) - k, v = x - else: - # Is this even possible? -> Likely, if some !directive cause - # YAML to parse this to some custom type. - self.fail(u'must be a single string, mapping, or a list' - u'' + str(x), - view, True) - return (super(Pairs, self)._convert_value(k, view), - super(Pairs, self)._convert_value(v, view)) - - -class Filename(Template): - """A template that validates strings as filenames. - - Filenames are returned as absolute, tilde-free paths. - - Relative paths are relative to the template's `cwd` argument - when it is specified, then the configuration directory (see - the `config_dir` method) if they come from a file. Otherwise, - they are relative to the current working directory. This helps - attain the expected behavior when using command-line options. - """ - def __init__(self, default=REQUIRED, cwd=None, relative_to=None, - in_app_dir=False): - """`relative_to` is the name of a sibling value that is - being validated at the same time. - - `in_app_dir` indicates whether the path should be resolved - inside the application's config directory (even when the setting - does not come from a file). - """ - super(Filename, self).__init__(default) - self.cwd = cwd - self.relative_to = relative_to - self.in_app_dir = in_app_dir - - def __repr__(self): - args = [] - - if self.default is not REQUIRED: - args.append(repr(self.default)) - - if self.cwd is not None: - args.append('cwd=' + repr(self.cwd)) - - if self.relative_to is not None: - args.append('relative_to=' + repr(self.relative_to)) - - if self.in_app_dir: - args.append('in_app_dir=True') - - return 'Filename({0})'.format(', '.join(args)) - - def resolve_relative_to(self, view, template): - if not isinstance(template, (collections.Mapping, MappingTemplate)): - # disallow config.get(Filename(relative_to='foo')) - raise ConfigTemplateError( - u'relative_to may only be used when getting multiple values.' - ) - - elif self.relative_to == view.key: - raise ConfigTemplateError( - u'{0} is relative to itself'.format(view.name) - ) - - elif self.relative_to not in view.parent.keys(): - # self.relative_to is not in the config - self.fail( - ( - u'needs sibling value "{0}" to expand relative path' - ).format(self.relative_to), - view - ) - - old_template = {} - old_template.update(template.subtemplates) - - # save time by skipping MappingTemplate's init loop - next_template = MappingTemplate({}) - next_relative = self.relative_to - - # gather all the needed templates and nothing else - while next_relative is not None: - try: - # pop to avoid infinite loop because of recursive - # relative paths - rel_to_template = old_template.pop(next_relative) - except KeyError: - if next_relative in template.subtemplates: - # we encountered this config key previously - raise ConfigTemplateError(( - u'{0} and {1} are recursively relative' - ).format(view.name, self.relative_to)) - else: - raise ConfigTemplateError(( - u'missing template for {0}, needed to expand {1}\'s' + - u'relative path' - ).format(self.relative_to, view.name)) - - next_template.subtemplates[next_relative] = rel_to_template - next_relative = rel_to_template.relative_to - - return view.parent.get(next_template)[self.relative_to] - - def value(self, view, template=None): - path, source = view.first() - if not isinstance(path, BASESTRING): - self.fail( - u'must be a filename, not {0}'.format(type(path).__name__), - view, - True - ) - path = os.path.expanduser(STRING(path)) - - if not os.path.isabs(path): - if self.cwd is not None: - # relative to the template's argument - path = os.path.join(self.cwd, path) - - elif self.relative_to is not None: - path = os.path.join( - self.resolve_relative_to(view, template), - path, - ) - - elif source.filename or self.in_app_dir: - # From defaults: relative to the app's directory. - path = os.path.join(view.root().config_dir(), path) - - return os.path.abspath(path) - - -class TypeTemplate(Template): - """A simple template that checks that a value is an instance of a - desired Python type. - """ - def __init__(self, typ, default=REQUIRED): - """Create a template that checks that the value is an instance - of `typ`. - """ - super(TypeTemplate, self).__init__(default) - self.typ = typ - - def convert(self, value, view): - if not isinstance(value, self.typ): - self.fail( - u'must be a {0}, not {1}'.format( - self.typ.__name__, - type(value).__name__, - ), - view, - True - ) - return value - - -class AttrDict(dict): - """A `dict` subclass that can be accessed via attributes (dot - notation) for convenience. - """ - def __getattr__(self, key): - if key in self: - return self[key] - else: - raise AttributeError(key) - - -def as_template(value): - """Convert a simple "shorthand" Python value to a `Template`. - """ - if isinstance(value, Template): - # If it's already a Template, pass it through. - return value - elif isinstance(value, collections.Mapping): - # Dictionaries work as templates. - return MappingTemplate(value) - elif value is int: - return Integer() - elif isinstance(value, int): - return Integer(value) - elif isinstance(value, type) and issubclass(value, BASESTRING): - return String() - elif isinstance(value, BASESTRING): - return String(value) - elif isinstance(value, set): - # convert to list to avoid hash related problems - return Choice(list(value)) - elif isinstance(value, list): - return OneOf(value) - elif value is float: - return Number() - elif value is None: - return Template() - elif value is dict: - return TypeTemplate(collections.Mapping) - elif value is list: - return TypeTemplate(collections.Sequence) - elif isinstance(value, type): - return TypeTemplate(value) - else: - raise ValueError(u'cannot convert to template: {0!r}'.format(value)) +# Cleanup namespace. +del key, value, warnings, confuse diff --git a/libs/common/beets/util/enumeration.py b/libs/common/beets/util/enumeration.py index 3e946718..e49f6fdd 100644 --- a/libs/common/beets/util/enumeration.py +++ b/libs/common/beets/util/enumeration.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -13,7 +12,6 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function from enum import Enum diff --git a/libs/common/beets/util/functemplate.py b/libs/common/beets/util/functemplate.py index 0e13db4a..289a436d 100644 --- a/libs/common/beets/util/functemplate.py +++ b/libs/common/beets/util/functemplate.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -27,30 +26,30 @@ This is sort of like a tiny, horrible degeneration of a real templating engine like Jinja2 or Mustache. """ -from __future__ import division, absolute_import, print_function import re import ast import dis import types import sys -import six +import functools -SYMBOL_DELIM = u'$' -FUNC_DELIM = u'%' -GROUP_OPEN = u'{' -GROUP_CLOSE = u'}' -ARG_SEP = u',' -ESCAPE_CHAR = u'$' +SYMBOL_DELIM = '$' +FUNC_DELIM = '%' +GROUP_OPEN = '{' +GROUP_CLOSE = '}' +ARG_SEP = ',' +ESCAPE_CHAR = '$' VARIABLE_PREFIX = '__var_' FUNCTION_PREFIX = '__func_' -class Environment(object): +class Environment: """Contains the values and functions to be substituted into a template. """ + def __init__(self, values, functions): self.values = values self.functions = functions @@ -72,15 +71,7 @@ def ex_literal(val): """An int, float, long, bool, string, or None literal with the given value. """ - if val is None: - return ast.Name('None', ast.Load()) - elif isinstance(val, six.integer_types): - return ast.Num(val) - elif isinstance(val, bool): - return ast.Name(bytes(val), ast.Load()) - elif isinstance(val, six.string_types): - return ast.Str(val) - raise TypeError(u'no literal for {0}'.format(type(val))) + return ast.Constant(val) def ex_varassign(name, expr): @@ -97,7 +88,7 @@ def ex_call(func, args): function may be an expression or the name of a function. Each argument may be an expression or a value to be used as a literal. """ - if isinstance(func, six.string_types): + if isinstance(func, str): func = ex_rvalue(func) args = list(args) @@ -105,10 +96,7 @@ def ex_call(func, args): if not isinstance(args[i], ast.expr): args[i] = ex_literal(args[i]) - if sys.version_info[:2] < (3, 5): - return ast.Call(func, args, [], None, None) - else: - return ast.Call(func, args, []) + return ast.Call(func, args, []) def compile_func(arg_names, statements, name='_the_func', debug=False): @@ -116,32 +104,30 @@ def compile_func(arg_names, statements, name='_the_func', debug=False): the resulting Python function. If `debug`, then print out the bytecode of the compiled function. """ - if six.PY2: - func_def = ast.FunctionDef( - name=name.encode('utf-8'), - args=ast.arguments( - args=[ast.Name(n, ast.Param()) for n in arg_names], - vararg=None, - kwarg=None, - defaults=[ex_literal(None) for _ in arg_names], - ), - body=statements, - decorator_list=[], - ) - else: - func_def = ast.FunctionDef( - name=name, - args=ast.arguments( - args=[ast.arg(arg=n, annotation=None) for n in arg_names], - kwonlyargs=[], - kw_defaults=[], - defaults=[ex_literal(None) for _ in arg_names], - ), - body=statements, - decorator_list=[], - ) + args_fields = { + 'args': [ast.arg(arg=n, annotation=None) for n in arg_names], + 'kwonlyargs': [], + 'kw_defaults': [], + 'defaults': [ex_literal(None) for _ in arg_names], + } + if 'posonlyargs' in ast.arguments._fields: # Added in Python 3.8. + args_fields['posonlyargs'] = [] + args = ast.arguments(**args_fields) + + func_def = ast.FunctionDef( + name=name, + args=args, + body=statements, + decorator_list=[], + ) + + # The ast.Module signature changed in 3.8 to accept a list of types to + # ignore. + if sys.version_info >= (3, 8): + mod = ast.Module([func_def], []) + else: + mod = ast.Module([func_def]) - mod = ast.Module([func_def]) ast.fix_missing_locations(mod) prog = compile(mod, '', 'exec') @@ -160,14 +146,15 @@ def compile_func(arg_names, statements, name='_the_func', debug=False): # AST nodes for the template language. -class Symbol(object): +class Symbol: """A variable-substitution symbol in a template.""" + def __init__(self, ident, original): self.ident = ident self.original = original def __repr__(self): - return u'Symbol(%s)' % repr(self.ident) + return 'Symbol(%s)' % repr(self.ident) def evaluate(self, env): """Evaluate the symbol in the environment, returning a Unicode @@ -182,24 +169,22 @@ class Symbol(object): def translate(self): """Compile the variable lookup.""" - if six.PY2: - ident = self.ident.encode('utf-8') - else: - ident = self.ident + ident = self.ident expr = ex_rvalue(VARIABLE_PREFIX + ident) - return [expr], set([ident]), set() + return [expr], {ident}, set() -class Call(object): +class Call: """A function call in a template.""" + def __init__(self, ident, args, original): self.ident = ident self.args = args self.original = original def __repr__(self): - return u'Call(%s, %s, %s)' % (repr(self.ident), repr(self.args), - repr(self.original)) + return 'Call({}, {}, {})'.format(repr(self.ident), repr(self.args), + repr(self.original)) def evaluate(self, env): """Evaluate the function call in the environment, returning a @@ -212,19 +197,15 @@ class Call(object): except Exception as exc: # Function raised exception! Maybe inlining the name of # the exception will help debug. - return u'<%s>' % six.text_type(exc) - return six.text_type(out) + return '<%s>' % str(exc) + return str(out) else: return self.original def translate(self): """Compile the function call.""" varnames = set() - if six.PY2: - ident = self.ident.encode('utf-8') - else: - ident = self.ident - funcnames = set([ident]) + funcnames = {self.ident} arg_exprs = [] for arg in self.args: @@ -235,32 +216,33 @@ class Call(object): # Create a subexpression that joins the result components of # the arguments. arg_exprs.append(ex_call( - ast.Attribute(ex_literal(u''), 'join', ast.Load()), + ast.Attribute(ex_literal(''), 'join', ast.Load()), [ex_call( 'map', [ - ex_rvalue(six.text_type.__name__), + ex_rvalue(str.__name__), ast.List(subexprs, ast.Load()), ] )], )) subexpr_call = ex_call( - FUNCTION_PREFIX + ident, + FUNCTION_PREFIX + self.ident, arg_exprs ) return [subexpr_call], varnames, funcnames -class Expression(object): +class Expression: """Top-level template construct: contains a list of text blobs, Symbols, and Calls. """ + def __init__(self, parts): self.parts = parts def __repr__(self): - return u'Expression(%s)' % (repr(self.parts)) + return 'Expression(%s)' % (repr(self.parts)) def evaluate(self, env): """Evaluate the entire expression in the environment, returning @@ -268,11 +250,11 @@ class Expression(object): """ out = [] for part in self.parts: - if isinstance(part, six.string_types): + if isinstance(part, str): out.append(part) else: out.append(part.evaluate(env)) - return u''.join(map(six.text_type, out)) + return ''.join(map(str, out)) def translate(self): """Compile the expression to a list of Python AST expressions, a @@ -282,7 +264,7 @@ class Expression(object): varnames = set() funcnames = set() for part in self.parts: - if isinstance(part, six.string_types): + if isinstance(part, str): expressions.append(ex_literal(part)) else: e, v, f = part.translate() @@ -298,7 +280,7 @@ class ParseError(Exception): pass -class Parser(object): +class Parser: """Parses a template expression string. Instantiate the class with the template source and call ``parse_expression``. The ``pos`` field will indicate the character after the expression finished and @@ -311,6 +293,7 @@ class Parser(object): replaced with a real, accepted parsing technique (PEG, parser generator, etc.). """ + def __init__(self, string, in_argument=False): """ Create a new parser. :param in_arguments: boolean that indicates the parser is to be @@ -326,7 +309,7 @@ class Parser(object): special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE, ESCAPE_CHAR) special_char_re = re.compile(r'[%s]|\Z' % - u''.join(re.escape(c) for c in special_chars)) + ''.join(re.escape(c) for c in special_chars)) escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP) terminator_chars = (GROUP_CLOSE,) @@ -343,7 +326,7 @@ class Parser(object): if self.in_argument: extra_special_chars = (ARG_SEP,) special_char_re = re.compile( - r'[%s]|\Z' % u''.join( + r'[%s]|\Z' % ''.join( re.escape(c) for c in self.special_chars + extra_special_chars ) @@ -387,7 +370,7 @@ class Parser(object): # Shift all characters collected so far into a single string. if text_parts: - self.parts.append(u''.join(text_parts)) + self.parts.append(''.join(text_parts)) text_parts = [] if char == SYMBOL_DELIM: @@ -409,7 +392,7 @@ class Parser(object): # If any parsed characters remain, shift them into a string. if text_parts: - self.parts.append(u''.join(text_parts)) + self.parts.append(''.join(text_parts)) def parse_symbol(self): """Parse a variable reference (like ``$foo`` or ``${foo}``) @@ -547,11 +530,27 @@ def _parse(template): return Expression(parts) -# External interface. +def cached(func): + """Like the `functools.lru_cache` decorator, but works (as a no-op) + on Python < 3.2. + """ + if hasattr(functools, 'lru_cache'): + return functools.lru_cache(maxsize=128)(func) + else: + # Do nothing when lru_cache is not available. + return func -class Template(object): + +@cached +def template(fmt): + return Template(fmt) + + +# External interface. +class Template: """A string template, including text, Symbols, and Calls. """ + def __init__(self, template): self.expr = _parse(template) self.original = template @@ -600,7 +599,7 @@ class Template(object): for funcname in funcnames: args[FUNCTION_PREFIX + funcname] = functions[funcname] parts = func(**args) - return u''.join(parts) + return ''.join(parts) return wrapper_func @@ -609,9 +608,9 @@ class Template(object): if __name__ == '__main__': import timeit - _tmpl = Template(u'foo $bar %baz{foozle $bar barzle} $bar') + _tmpl = Template('foo $bar %baz{foozle $bar barzle} $bar') _vars = {'bar': 'qux'} - _funcs = {'baz': six.text_type.upper} + _funcs = {'baz': str.upper} interp_time = timeit.timeit('_tmpl.interpret(_vars, _funcs)', 'from __main__ import _tmpl, _vars, _funcs', number=10000) @@ -620,4 +619,4 @@ if __name__ == '__main__': 'from __main__ import _tmpl, _vars, _funcs', number=10000) print(comp_time) - print(u'Speedup:', interp_time / comp_time) + print('Speedup:', interp_time / comp_time) diff --git a/libs/common/beets/util/hidden.py b/libs/common/beets/util/hidden.py index ed97f2bf..881de1ac 100644 --- a/libs/common/beets/util/hidden.py +++ b/libs/common/beets/util/hidden.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -14,7 +13,6 @@ # included in all copies or substantial portions of the Software. """Simple library to work out if a file is hidden on different platforms.""" -from __future__ import division, absolute_import, print_function import os import stat diff --git a/libs/common/beets/util/pipeline.py b/libs/common/beets/util/pipeline.py index 39bc7152..d338cb51 100644 --- a/libs/common/beets/util/pipeline.py +++ b/libs/common/beets/util/pipeline.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -32,12 +31,10 @@ To do so, pass an iterable of coroutines to the Pipeline constructor in place of any single coroutine. """ -from __future__ import division, absolute_import, print_function -from six.moves import queue +import queue from threading import Thread, Lock import sys -import six BUBBLE = '__PIPELINE_BUBBLE__' POISON = '__PIPELINE_POISON__' @@ -91,6 +88,7 @@ class CountedQueue(queue.Queue): still feeding into it. The queue is poisoned when all threads are finished with the queue. """ + def __init__(self, maxsize=0): queue.Queue.__init__(self, maxsize) self.nthreads = 0 @@ -135,10 +133,11 @@ class CountedQueue(queue.Queue): _invalidate_queue(self, POISON, False) -class MultiMessage(object): +class MultiMessage: """A message yielded by a pipeline stage encapsulating multiple values to be sent to the next stage. """ + def __init__(self, messages): self.messages = messages @@ -210,8 +209,9 @@ def _allmsgs(obj): class PipelineThread(Thread): """Abstract base class for pipeline-stage threads.""" + def __init__(self, all_threads): - super(PipelineThread, self).__init__() + super().__init__() self.abort_lock = Lock() self.abort_flag = False self.all_threads = all_threads @@ -241,15 +241,13 @@ class FirstPipelineThread(PipelineThread): """The thread running the first stage in a parallel pipeline setup. The coroutine should just be a generator. """ + def __init__(self, coro, out_queue, all_threads): - super(FirstPipelineThread, self).__init__(all_threads) + super().__init__(all_threads) self.coro = coro self.out_queue = out_queue self.out_queue.acquire() - self.abort_lock = Lock() - self.abort_flag = False - def run(self): try: while True: @@ -282,8 +280,9 @@ class MiddlePipelineThread(PipelineThread): """A thread running any stage in the pipeline except the first or last. """ + def __init__(self, coro, in_queue, out_queue, all_threads): - super(MiddlePipelineThread, self).__init__(all_threads) + super().__init__(all_threads) self.coro = coro self.in_queue = in_queue self.out_queue = out_queue @@ -330,8 +329,9 @@ class LastPipelineThread(PipelineThread): """A thread running the last stage in a pipeline. The coroutine should yield nothing. """ + def __init__(self, coro, in_queue, all_threads): - super(LastPipelineThread, self).__init__(all_threads) + super().__init__(all_threads) self.coro = coro self.in_queue = in_queue @@ -362,17 +362,18 @@ class LastPipelineThread(PipelineThread): return -class Pipeline(object): +class Pipeline: """Represents a staged pattern of work. Each stage in the pipeline is a coroutine that receives messages from the previous stage and yields messages to be sent to the next stage. """ + def __init__(self, stages): """Makes a new pipeline from a list of coroutines. There must be at least two stages. """ if len(stages) < 2: - raise ValueError(u'pipeline must have at least two stages') + raise ValueError('pipeline must have at least two stages') self.stages = [] for stage in stages: if isinstance(stage, (list, tuple)): @@ -442,7 +443,7 @@ class Pipeline(object): exc_info = thread.exc_info if exc_info: # Make the exception appear as it was raised originally. - six.reraise(exc_info[0], exc_info[1], exc_info[2]) + raise exc_info[1].with_traceback(exc_info[2]) def pull(self): """Yield elements from the end of the pipeline. Runs the stages @@ -469,6 +470,7 @@ class Pipeline(object): for msg in msgs: yield msg + # Smoke test. if __name__ == '__main__': import time @@ -477,14 +479,14 @@ if __name__ == '__main__': # in parallel. def produce(): for i in range(5): - print(u'generating %i' % i) + print('generating %i' % i) time.sleep(1) yield i def work(): num = yield while True: - print(u'processing %i' % num) + print('processing %i' % num) time.sleep(2) num = yield num * 2 @@ -492,7 +494,7 @@ if __name__ == '__main__': while True: num = yield time.sleep(1) - print(u'received %i' % num) + print('received %i' % num) ts_start = time.time() Pipeline([produce(), work(), consume()]).run_sequential() @@ -501,22 +503,22 @@ if __name__ == '__main__': ts_par = time.time() Pipeline([produce(), (work(), work()), consume()]).run_parallel() ts_end = time.time() - print(u'Sequential time:', ts_seq - ts_start) - print(u'Parallel time:', ts_par - ts_seq) - print(u'Multiply-parallel time:', ts_end - ts_par) + print('Sequential time:', ts_seq - ts_start) + print('Parallel time:', ts_par - ts_seq) + print('Multiply-parallel time:', ts_end - ts_par) print() # Test a pipeline that raises an exception. def exc_produce(): for i in range(10): - print(u'generating %i' % i) + print('generating %i' % i) time.sleep(1) yield i def exc_work(): num = yield while True: - print(u'processing %i' % num) + print('processing %i' % num) time.sleep(3) if num == 3: raise Exception() @@ -525,6 +527,6 @@ if __name__ == '__main__': def exc_consume(): while True: num = yield - print(u'received %i' % num) + print('received %i' % num) Pipeline([exc_produce(), exc_work(), exc_consume()]).run_parallel(1) diff --git a/libs/common/beets/vfs.py b/libs/common/beets/vfs.py index 7f9a049e..aef69650 100644 --- a/libs/common/beets/vfs.py +++ b/libs/common/beets/vfs.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,7 +15,6 @@ """A simple utility for constructing filesystem-like trees from beets libraries. """ -from __future__ import division, absolute_import, print_function from collections import namedtuple from beets import util diff --git a/libs/common/beetsplug/__init__.py b/libs/common/beetsplug/__init__.py index febeb66f..da248491 100644 --- a/libs/common/beetsplug/__init__.py +++ b/libs/common/beetsplug/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,7 +14,6 @@ """A namespace package for beets plugins.""" -from __future__ import division, absolute_import, print_function # Make this a namespace package. from pkgutil import extend_path diff --git a/libs/common/beetsplug/absubmit.py b/libs/common/beetsplug/absubmit.py index 0c288b9d..d1ea692f 100644 --- a/libs/common/beetsplug/absubmit.py +++ b/libs/common/beetsplug/absubmit.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Pieter Mulder. # @@ -16,7 +15,6 @@ """Calculate acoustic information and submit to AcousticBrainz. """ -from __future__ import division, absolute_import, print_function import errno import hashlib @@ -32,6 +30,9 @@ from beets import plugins from beets import util from beets import ui +# We use this field to check whether AcousticBrainz info is present. +PROBE_FIELD = 'mood_acoustic' + class ABSubmitError(Exception): """Raised when failing to analyse file with extractor.""" @@ -43,19 +44,23 @@ def call(args): Raise a AnalysisABSubmitError on failure. """ try: - return util.command_output(args) + return util.command_output(args).stdout except subprocess.CalledProcessError as e: raise ABSubmitError( - u'{0} exited with status {1}'.format(args[0], e.returncode) + '{} exited with status {}'.format(args[0], e.returncode) ) class AcousticBrainzSubmitPlugin(plugins.BeetsPlugin): def __init__(self): - super(AcousticBrainzSubmitPlugin, self).__init__() + super().__init__() - self.config.add({'extractor': u''}) + self.config.add({ + 'extractor': '', + 'force': False, + 'pretend': False + }) self.extractor = self.config['extractor'].as_str() if self.extractor: @@ -63,7 +68,7 @@ class AcousticBrainzSubmitPlugin(plugins.BeetsPlugin): # Expicit path to extractor if not os.path.isfile(self.extractor): raise ui.UserError( - u'Extractor command does not exist: {0}.'. + 'Extractor command does not exist: {0}.'. format(self.extractor) ) else: @@ -73,8 +78,8 @@ class AcousticBrainzSubmitPlugin(plugins.BeetsPlugin): call([self.extractor]) except OSError: raise ui.UserError( - u'No extractor command found: please install the ' - u'extractor binary from http://acousticbrainz.org/download' + 'No extractor command found: please install the extractor' + ' binary from https://acousticbrainz.org/download' ) except ABSubmitError: # Extractor found, will exit with an error if not called with @@ -96,7 +101,18 @@ class AcousticBrainzSubmitPlugin(plugins.BeetsPlugin): def commands(self): cmd = ui.Subcommand( 'absubmit', - help=u'calculate and submit AcousticBrainz analysis' + help='calculate and submit AcousticBrainz analysis' + ) + cmd.parser.add_option( + '-f', '--force', dest='force_refetch', + action='store_true', default=False, + help='re-download data when already present' + ) + cmd.parser.add_option( + '-p', '--pretend', dest='pretend_fetch', + action='store_true', default=False, + help='pretend to perform action, but show \ +only files which would be processed' ) cmd.func = self.command return [cmd] @@ -104,17 +120,30 @@ class AcousticBrainzSubmitPlugin(plugins.BeetsPlugin): def command(self, lib, opts, args): # Get items from arguments items = lib.items(ui.decargs(args)) - for item in items: - analysis = self._get_analysis(item) - if analysis: - self._submit_data(item, analysis) + self.opts = opts + util.par_map(self.analyze_submit, items) + + def analyze_submit(self, item): + analysis = self._get_analysis(item) + if analysis: + self._submit_data(item, analysis) def _get_analysis(self, item): mbid = item['mb_trackid'] - # If file has no mbid skip it. + + # Avoid re-analyzing files that already have AB data. + if not self.opts.force_refetch and not self.config['force']: + if item.get(PROBE_FIELD): + return None + + # If file has no MBID, skip it. if not mbid: - self._log.info(u'Not analysing {}, missing ' - u'musicbrainz track id.', item) + self._log.info('Not analysing {}, missing ' + 'musicbrainz track id.', item) + return None + + if self.opts.pretend_fetch or self.config['pretend']: + self._log.info('pretend action - extract item: {}', item) return None # Temporary file to save extractor output to, extractor only works @@ -129,11 +158,11 @@ class AcousticBrainzSubmitPlugin(plugins.BeetsPlugin): call([self.extractor, util.syspath(item.path), filename]) except ABSubmitError as e: self._log.warning( - u'Failed to analyse {item} for AcousticBrainz: {error}', + 'Failed to analyse {item} for AcousticBrainz: {error}', item=item, error=e ) return None - with open(filename, 'rb') as tmp_file: + with open(filename) as tmp_file: analysis = json.load(tmp_file) # Add the hash to the output. analysis['metadata']['version']['essentia_build_sha'] = \ @@ -157,11 +186,11 @@ class AcousticBrainzSubmitPlugin(plugins.BeetsPlugin): try: message = response.json()['message'] except (ValueError, KeyError) as e: - message = u'unable to get error message: {}'.format(e) + message = f'unable to get error message: {e}' self._log.error( - u'Failed to submit AcousticBrainz analysis of {item}: ' - u'{message}).', item=item, message=message + 'Failed to submit AcousticBrainz analysis of {item}: ' + '{message}).', item=item, message=message ) else: - self._log.debug(u'Successfully submitted AcousticBrainz analysis ' - u'for {}.', item) + self._log.debug('Successfully submitted AcousticBrainz analysis ' + 'for {}.', item) diff --git a/libs/common/beetsplug/acousticbrainz.py b/libs/common/beetsplug/acousticbrainz.py index f4960c30..eabc5849 100644 --- a/libs/common/beetsplug/acousticbrainz.py +++ b/libs/common/beetsplug/acousticbrainz.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2015-2016, Ohm Patel. # @@ -15,12 +14,13 @@ """Fetch various AcousticBrainz metadata using MBID. """ -from __future__ import division, absolute_import, print_function + +from collections import defaultdict import requests -from collections import defaultdict from beets import plugins, ui +from beets.dbcore import types ACOUSTIC_BASE = "https://acousticbrainz.org/" LEVELS = ["/low-level", "/high-level"] @@ -72,6 +72,9 @@ ABSCHEME = { 'sad': 'mood_sad' } }, + 'moods_mirex': { + 'value': 'moods_mirex' + }, 'ismir04_rhythm': { 'value': 'rhythm' }, @@ -80,6 +83,9 @@ ABSCHEME = { 'tonal': 'tonal' } }, + 'timbre': { + 'value': 'timbre' + }, 'voice_instrumental': { 'value': 'voice_instrumental' }, @@ -104,8 +110,33 @@ ABSCHEME = { class AcousticPlugin(plugins.BeetsPlugin): + item_types = { + 'average_loudness': types.Float(6), + 'chords_changes_rate': types.Float(6), + 'chords_key': types.STRING, + 'chords_number_rate': types.Float(6), + 'chords_scale': types.STRING, + 'danceable': types.Float(6), + 'gender': types.STRING, + 'genre_rosamerica': types.STRING, + 'initial_key': types.STRING, + 'key_strength': types.Float(6), + 'mood_acoustic': types.Float(6), + 'mood_aggressive': types.Float(6), + 'mood_electronic': types.Float(6), + 'mood_happy': types.Float(6), + 'mood_party': types.Float(6), + 'mood_relaxed': types.Float(6), + 'mood_sad': types.Float(6), + 'moods_mirex': types.STRING, + 'rhythm': types.Float(6), + 'timbre': types.STRING, + 'tonal': types.Float(6), + 'voice_instrumental': types.STRING, + } + def __init__(self): - super(AcousticPlugin, self).__init__() + super().__init__() self.config.add({ 'auto': True, @@ -119,11 +150,11 @@ class AcousticPlugin(plugins.BeetsPlugin): def commands(self): cmd = ui.Subcommand('acousticbrainz', - help=u"fetch metadata from AcousticBrainz") + help="fetch metadata from AcousticBrainz") cmd.parser.add_option( - u'-f', u'--force', dest='force_refetch', + '-f', '--force', dest='force_refetch', action='store_true', default=False, - help=u're-download data when already present' + help='re-download data when already present' ) def func(lib, opts, args): @@ -142,22 +173,22 @@ class AcousticPlugin(plugins.BeetsPlugin): def _get_data(self, mbid): data = {} for url in _generate_urls(mbid): - self._log.debug(u'fetching URL: {}', url) + self._log.debug('fetching URL: {}', url) try: res = requests.get(url) except requests.RequestException as exc: - self._log.info(u'request error: {}', exc) + self._log.info('request error: {}', exc) return {} if res.status_code == 404: - self._log.info(u'recording ID {} not found', mbid) + self._log.info('recording ID {} not found', mbid) return {} try: data.update(res.json()) except ValueError: - self._log.debug(u'Invalid Response: {}', res.text) + self._log.debug('Invalid Response: {}', res.text) return {} return data @@ -172,28 +203,28 @@ class AcousticPlugin(plugins.BeetsPlugin): # representative field name to check for previously fetched # data. if not force: - mood_str = item.get('mood_acoustic', u'') + mood_str = item.get('mood_acoustic', '') if mood_str: - self._log.info(u'data already present for: {}', item) + self._log.info('data already present for: {}', item) continue # We can only fetch data for tracks with MBIDs. if not item.mb_trackid: continue - self._log.info(u'getting data for: {}', item) + self._log.info('getting data for: {}', item) data = self._get_data(item.mb_trackid) if data: for attr, val in self._map_data_to_scheme(data, ABSCHEME): if not tags or attr in tags: - self._log.debug(u'attribute {} of {} set to {}', + self._log.debug('attribute {} of {} set to {}', attr, item, val) setattr(item, attr, val) else: - self._log.debug(u'skipping attribute {} of {}' - u' (value {}) due to config', + self._log.debug('skipping attribute {} of {}' + ' (value {}) due to config', attr, item, val) @@ -255,10 +286,9 @@ class AcousticPlugin(plugins.BeetsPlugin): # The recursive traversal. composites = defaultdict(list) - for attr, val in self._data_to_scheme_child(data, - scheme, - composites): - yield attr, val + yield from self._data_to_scheme_child(data, + scheme, + composites) # When composites has been populated, yield the composite attributes # by joining their parts. @@ -278,10 +308,9 @@ class AcousticPlugin(plugins.BeetsPlugin): for k, v in subscheme.items(): if k in subdata: if type(v) == dict: - for attr, val in self._data_to_scheme_child(subdata[k], - v, - composites): - yield attr, val + yield from self._data_to_scheme_child(subdata[k], + v, + composites) elif type(v) == tuple: composite_attribute, part_number = v attribute_parts = composites[composite_attribute] @@ -292,10 +321,10 @@ class AcousticPlugin(plugins.BeetsPlugin): else: yield v, subdata[k] else: - self._log.warning(u'Acousticbrainz did not provide info' - u'about {}', k) - self._log.debug(u'Data {} could not be mapped to scheme {} ' - u'because key {} was not found', subdata, v, k) + self._log.warning('Acousticbrainz did not provide info' + 'about {}', k) + self._log.debug('Data {} could not be mapped to scheme {} ' + 'because key {} was not found', subdata, v, k) def _generate_urls(mbid): diff --git a/libs/common/beetsplug/albumtypes.py b/libs/common/beetsplug/albumtypes.py new file mode 100644 index 00000000..47f8dc64 --- /dev/null +++ b/libs/common/beetsplug/albumtypes.py @@ -0,0 +1,65 @@ +# This file is part of beets. +# Copyright 2021, Edgars Supe. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds an album template field for formatted album types.""" + + +from beets.autotag.mb import VARIOUS_ARTISTS_ID +from beets.library import Album +from beets.plugins import BeetsPlugin + + +class AlbumTypesPlugin(BeetsPlugin): + """Adds an album template field for formatted album types.""" + + def __init__(self): + """Init AlbumTypesPlugin.""" + super().__init__() + self.album_template_fields['atypes'] = self._atypes + self.config.add({ + 'types': [ + ('ep', 'EP'), + ('single', 'Single'), + ('soundtrack', 'OST'), + ('live', 'Live'), + ('compilation', 'Anthology'), + ('remix', 'Remix') + ], + 'ignore_va': ['compilation'], + 'bracket': '[]' + }) + + def _atypes(self, item: Album): + """Returns a formatted string based on album's types.""" + types = self.config['types'].as_pairs() + ignore_va = self.config['ignore_va'].as_str_seq() + bracket = self.config['bracket'].as_str() + + # Assign a left and right bracket or leave blank if argument is empty. + if len(bracket) == 2: + bracket_l = bracket[0] + bracket_r = bracket[1] + else: + bracket_l = '' + bracket_r = '' + + res = '' + albumtypes = item.albumtypes.split('; ') + is_va = item.mb_albumartistid == VARIOUS_ARTISTS_ID + for type in types: + if type[0] in albumtypes and type[1]: + if not is_va or (type[0] not in ignore_va and is_va): + res += f'{bracket_l}{type[1]}{bracket_r}' + + return res diff --git a/libs/common/beetsplug/aura.py b/libs/common/beetsplug/aura.py new file mode 100644 index 00000000..f4ae5527 --- /dev/null +++ b/libs/common/beetsplug/aura.py @@ -0,0 +1,984 @@ +# This file is part of beets. +# Copyright 2020, Callum Brown. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""An AURA server using Flask.""" + + +from mimetypes import guess_type +import re +import os.path +from os.path import isfile, getsize + +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand, _open_library +from beets import config +from beets.util import py3_path +from beets.library import Item, Album +from beets.dbcore.query import ( + MatchQuery, + NotQuery, + RegexpQuery, + AndQuery, + FixedFieldSort, + SlowFieldSort, + MultipleSort, +) + +from flask import ( + Blueprint, + Flask, + current_app, + send_file, + make_response, + request, +) + + +# Constants + +# AURA server information +# TODO: Add version information +SERVER_INFO = { + "aura-version": "0", + "server": "beets-aura", + "server-version": "0.1", + "auth-required": False, + "features": ["albums", "artists", "images"], +} + +# Maps AURA Track attribute to beets Item attribute +TRACK_ATTR_MAP = { + # Required + "title": "title", + "artist": "artist", + # Optional + "album": "album", + "track": "track", # Track number on album + "tracktotal": "tracktotal", + "disc": "disc", + "disctotal": "disctotal", + "year": "year", + "month": "month", + "day": "day", + "bpm": "bpm", + "genre": "genre", + "recording-mbid": "mb_trackid", # beets trackid is MB recording + "track-mbid": "mb_releasetrackid", + "composer": "composer", + "albumartist": "albumartist", + "comments": "comments", + # Optional for Audio Metadata + # TODO: Support the mimetype attribute, format != mime type + # "mimetype": track.format, + "duration": "length", + "framerate": "samplerate", + # I don't think beets has a framecount field + # "framecount": ???, + "channels": "channels", + "bitrate": "bitrate", + "bitdepth": "bitdepth", + "size": "filesize", +} + +# Maps AURA Album attribute to beets Album attribute +ALBUM_ATTR_MAP = { + # Required + "title": "album", + "artist": "albumartist", + # Optional + "tracktotal": "albumtotal", + "disctotal": "disctotal", + "year": "year", + "month": "month", + "day": "day", + "genre": "genre", + "release-mbid": "mb_albumid", + "release-group-mbid": "mb_releasegroupid", +} + +# Maps AURA Artist attribute to beets Item field +# Artists are not first-class in beets, so information is extracted from +# beets Items. +ARTIST_ATTR_MAP = { + # Required + "name": "artist", + # Optional + "artist-mbid": "mb_artistid", +} + + +class AURADocument: + """Base class for building AURA documents.""" + + @staticmethod + def error(status, title, detail): + """Make a response for an error following the JSON:API spec. + + Args: + status: An HTTP status code string, e.g. "404 Not Found". + title: A short, human-readable summary of the problem. + detail: A human-readable explanation specific to this + occurrence of the problem. + """ + document = { + "errors": [{"status": status, "title": title, "detail": detail}] + } + return make_response(document, status) + + def translate_filters(self): + """Translate filters from request arguments to a beets Query.""" + # The format of each filter key in the request parameter is: + # filter[]. This regex extracts . + pattern = re.compile(r"filter\[(?P[a-zA-Z0-9_-]+)\]") + queries = [] + for key, value in request.args.items(): + match = pattern.match(key) + if match: + # Extract attribute name from key + aura_attr = match.group("attribute") + # Get the beets version of the attribute name + beets_attr = self.attribute_map.get(aura_attr, aura_attr) + converter = self.get_attribute_converter(beets_attr) + value = converter(value) + # Add exact match query to list + # Use a slow query so it works with all fields + queries.append(MatchQuery(beets_attr, value, fast=False)) + # NOTE: AURA doesn't officially support multiple queries + return AndQuery(queries) + + def translate_sorts(self, sort_arg): + """Translate an AURA sort parameter into a beets Sort. + + Args: + sort_arg: The value of the 'sort' query parameter; a comma + separated list of fields to sort by, in order. + E.g. "-year,title". + """ + # Change HTTP query parameter to a list + aura_sorts = sort_arg.strip(",").split(",") + sorts = [] + for aura_attr in aura_sorts: + if aura_attr[0] == "-": + ascending = False + # Remove leading "-" + aura_attr = aura_attr[1:] + else: + # JSON:API default + ascending = True + # Get the beets version of the attribute name + beets_attr = self.attribute_map.get(aura_attr, aura_attr) + # Use slow sort so it works with all fields (inc. computed) + sorts.append(SlowFieldSort(beets_attr, ascending=ascending)) + return MultipleSort(sorts) + + def paginate(self, collection): + """Get a page of the collection and the URL to the next page. + + Args: + collection: The raw data from which resource objects can be + built. Could be an sqlite3.Cursor object (tracks and + albums) or a list of strings (artists). + """ + # Pages start from zero + page = request.args.get("page", 0, int) + # Use page limit defined in config by default. + default_limit = config["aura"]["page_limit"].get(int) + limit = request.args.get("limit", default_limit, int) + # start = offset of first item to return + start = page * limit + # end = offset of last item + 1 + end = start + limit + if end > len(collection): + end = len(collection) + next_url = None + else: + # Not the last page so work out links.next url + if not request.args: + # No existing arguments, so current page is 0 + next_url = request.url + "?page=1" + elif not request.args.get("page", None): + # No existing page argument, so add one to the end + next_url = request.url + "&page=1" + else: + # Increment page token by 1 + next_url = request.url.replace( + f"page={page}", "page={}".format(page + 1) + ) + # Get only the items in the page range + data = [self.resource_object(collection[i]) for i in range(start, end)] + return data, next_url + + def get_included(self, data, include_str): + """Build a list of resource objects for inclusion. + + Args: + data: An array of dicts in the form of resource objects. + include_str: A comma separated list of resource types to + include. E.g. "tracks,images". + """ + # Change HTTP query parameter to a list + to_include = include_str.strip(",").split(",") + # Build a list of unique type and id combinations + # For each resource object in the primary data, iterate over it's + # relationships. If a relationship matches one of the types + # requested for inclusion (e.g. "albums") then add each type-id pair + # under the "data" key to unique_identifiers, checking first that + # it has not already been added. This ensures that no resources are + # included more than once. + unique_identifiers = [] + for res_obj in data: + for rel_name, rel_obj in res_obj["relationships"].items(): + if rel_name in to_include: + # NOTE: Assumes relationship is to-many + for identifier in rel_obj["data"]: + if identifier not in unique_identifiers: + unique_identifiers.append(identifier) + # TODO: I think this could be improved + included = [] + for identifier in unique_identifiers: + res_type = identifier["type"] + if res_type == "track": + track_id = int(identifier["id"]) + track = current_app.config["lib"].get_item(track_id) + included.append(TrackDocument.resource_object(track)) + elif res_type == "album": + album_id = int(identifier["id"]) + album = current_app.config["lib"].get_album(album_id) + included.append(AlbumDocument.resource_object(album)) + elif res_type == "artist": + artist_id = identifier["id"] + included.append(ArtistDocument.resource_object(artist_id)) + elif res_type == "image": + image_id = identifier["id"] + included.append(ImageDocument.resource_object(image_id)) + else: + raise ValueError(f"Invalid resource type: {res_type}") + return included + + def all_resources(self): + """Build document for /tracks, /albums or /artists.""" + query = self.translate_filters() + sort_arg = request.args.get("sort", None) + if sort_arg: + sort = self.translate_sorts(sort_arg) + # For each sort field add a query which ensures all results + # have a non-empty, non-zero value for that field. + for s in sort.sorts: + query.subqueries.append( + NotQuery( + # Match empty fields (^$) or zero fields, (^0$) + RegexpQuery(s.field, "(^$|^0$)", fast=False) + ) + ) + else: + sort = None + # Get information from the library + collection = self.get_collection(query=query, sort=sort) + # Convert info to AURA form and paginate it + data, next_url = self.paginate(collection) + document = {"data": data} + # If there are more pages then provide a way to access them + if next_url: + document["links"] = {"next": next_url} + # Include related resources for each element in "data" + include_str = request.args.get("include", None) + if include_str: + document["included"] = self.get_included(data, include_str) + return document + + def single_resource_document(self, resource_object): + """Build document for a specific requested resource. + + Args: + resource_object: A dictionary in the form of a JSON:API + resource object. + """ + document = {"data": resource_object} + include_str = request.args.get("include", None) + if include_str: + # [document["data"]] is because arg needs to be list + document["included"] = self.get_included( + [document["data"]], include_str + ) + return document + + +class TrackDocument(AURADocument): + """Class for building documents for /tracks endpoints.""" + + attribute_map = TRACK_ATTR_MAP + + def get_collection(self, query=None, sort=None): + """Get Item objects from the library. + + Args: + query: A beets Query object or a beets query string. + sort: A beets Sort object. + """ + return current_app.config["lib"].items(query, sort) + + def get_attribute_converter(self, beets_attr): + """Work out what data type an attribute should be for beets. + + Args: + beets_attr: The name of the beets attribute, e.g. "title". + """ + # filesize is a special field (read from disk not db?) + if beets_attr == "filesize": + converter = int + else: + try: + # Look for field in list of Item fields + # and get python type of database type. + # See beets.library.Item and beets.dbcore.types + converter = Item._fields[beets_attr].model_type + except KeyError: + # Fall back to string (NOTE: probably not good) + converter = str + return converter + + @staticmethod + def resource_object(track): + """Construct a JSON:API resource object from a beets Item. + + Args: + track: A beets Item object. + """ + attributes = {} + # Use aura => beets attribute map, e.g. size => filesize + for aura_attr, beets_attr in TRACK_ATTR_MAP.items(): + a = getattr(track, beets_attr) + # Only set attribute if it's not None, 0, "", etc. + # NOTE: This could result in required attributes not being set + if a: + attributes[aura_attr] = a + + # JSON:API one-to-many relationship to parent album + relationships = { + "artists": {"data": [{"type": "artist", "id": track.artist}]} + } + # Only add album relationship if not singleton + if not track.singleton: + relationships["albums"] = { + "data": [{"type": "album", "id": str(track.album_id)}] + } + + return { + "type": "track", + "id": str(track.id), + "attributes": attributes, + "relationships": relationships, + } + + def single_resource(self, track_id): + """Get track from the library and build a document. + + Args: + track_id: The beets id of the track (integer). + """ + track = current_app.config["lib"].get_item(track_id) + if not track: + return self.error( + "404 Not Found", + "No track with the requested id.", + "There is no track with an id of {} in the library.".format( + track_id + ), + ) + return self.single_resource_document(self.resource_object(track)) + + +class AlbumDocument(AURADocument): + """Class for building documents for /albums endpoints.""" + + attribute_map = ALBUM_ATTR_MAP + + def get_collection(self, query=None, sort=None): + """Get Album objects from the library. + + Args: + query: A beets Query object or a beets query string. + sort: A beets Sort object. + """ + return current_app.config["lib"].albums(query, sort) + + def get_attribute_converter(self, beets_attr): + """Work out what data type an attribute should be for beets. + + Args: + beets_attr: The name of the beets attribute, e.g. "title". + """ + try: + # Look for field in list of Album fields + # and get python type of database type. + # See beets.library.Album and beets.dbcore.types + converter = Album._fields[beets_attr].model_type + except KeyError: + # Fall back to string (NOTE: probably not good) + converter = str + return converter + + @staticmethod + def resource_object(album): + """Construct a JSON:API resource object from a beets Album. + + Args: + album: A beets Album object. + """ + attributes = {} + # Use aura => beets attribute name map + for aura_attr, beets_attr in ALBUM_ATTR_MAP.items(): + a = getattr(album, beets_attr) + # Only set attribute if it's not None, 0, "", etc. + # NOTE: This could mean required attributes are not set + if a: + attributes[aura_attr] = a + + # Get beets Item objects for all tracks in the album sorted by + # track number. Sorting is not required but it's nice. + query = MatchQuery("album_id", album.id) + sort = FixedFieldSort("track", ascending=True) + tracks = current_app.config["lib"].items(query, sort) + # JSON:API one-to-many relationship to tracks on the album + relationships = { + "tracks": { + "data": [{"type": "track", "id": str(t.id)} for t in tracks] + } + } + # Add images relationship if album has associated images + if album.artpath: + path = py3_path(album.artpath) + filename = path.split("/")[-1] + image_id = f"album-{album.id}-{filename}" + relationships["images"] = { + "data": [{"type": "image", "id": image_id}] + } + # Add artist relationship if artist name is same on tracks + # Tracks are used to define artists so don't albumartist + # Check for all tracks in case some have featured artists + if album.albumartist in [t.artist for t in tracks]: + relationships["artists"] = { + "data": [{"type": "artist", "id": album.albumartist}] + } + + return { + "type": "album", + "id": str(album.id), + "attributes": attributes, + "relationships": relationships, + } + + def single_resource(self, album_id): + """Get album from the library and build a document. + + Args: + album_id: The beets id of the album (integer). + """ + album = current_app.config["lib"].get_album(album_id) + if not album: + return self.error( + "404 Not Found", + "No album with the requested id.", + "There is no album with an id of {} in the library.".format( + album_id + ), + ) + return self.single_resource_document(self.resource_object(album)) + + +class ArtistDocument(AURADocument): + """Class for building documents for /artists endpoints.""" + + attribute_map = ARTIST_ATTR_MAP + + def get_collection(self, query=None, sort=None): + """Get a list of artist names from the library. + + Args: + query: A beets Query object or a beets query string. + sort: A beets Sort object. + """ + # Gets only tracks with matching artist information + tracks = current_app.config["lib"].items(query, sort) + collection = [] + for track in tracks: + # Do not add duplicates + if track.artist not in collection: + collection.append(track.artist) + return collection + + def get_attribute_converter(self, beets_attr): + """Work out what data type an attribute should be for beets. + + Args: + beets_attr: The name of the beets attribute, e.g. "artist". + """ + try: + # Look for field in list of Item fields + # and get python type of database type. + # See beets.library.Item and beets.dbcore.types + converter = Item._fields[beets_attr].model_type + except KeyError: + # Fall back to string (NOTE: probably not good) + converter = str + return converter + + @staticmethod + def resource_object(artist_id): + """Construct a JSON:API resource object for the given artist. + + Args: + artist_id: A string which is the artist's name. + """ + # Get tracks where artist field exactly matches artist_id + query = MatchQuery("artist", artist_id) + tracks = current_app.config["lib"].items(query) + if not tracks: + return None + + # Get artist information from the first track + # NOTE: It could be that the first track doesn't have a + # MusicBrainz id but later tracks do, which isn't ideal. + attributes = {} + # Use aura => beets attribute map, e.g. artist => name + for aura_attr, beets_attr in ARTIST_ATTR_MAP.items(): + a = getattr(tracks[0], beets_attr) + # Only set attribute if it's not None, 0, "", etc. + # NOTE: This could mean required attributes are not set + if a: + attributes[aura_attr] = a + + relationships = { + "tracks": { + "data": [{"type": "track", "id": str(t.id)} for t in tracks] + } + } + album_query = MatchQuery("albumartist", artist_id) + albums = current_app.config["lib"].albums(query=album_query) + if len(albums) != 0: + relationships["albums"] = { + "data": [{"type": "album", "id": str(a.id)} for a in albums] + } + + return { + "type": "artist", + "id": artist_id, + "attributes": attributes, + "relationships": relationships, + } + + def single_resource(self, artist_id): + """Get info for the requested artist and build a document. + + Args: + artist_id: A string which is the artist's name. + """ + artist_resource = self.resource_object(artist_id) + if not artist_resource: + return self.error( + "404 Not Found", + "No artist with the requested id.", + "There is no artist with an id of {} in the library.".format( + artist_id + ), + ) + return self.single_resource_document(artist_resource) + + +def safe_filename(fn): + """Check whether a string is a simple (non-path) filename. + + For example, `foo.txt` is safe because it is a "plain" filename. But + `foo/bar.txt` and `../foo.txt` and `.` are all non-safe because they + can traverse to other directories other than the current one. + """ + # Rule out any directories. + if os.path.basename(fn) != fn: + return False + + # In single names, rule out Unix directory traversal names. + if fn in ('.', '..'): + return False + + return True + + +class ImageDocument(AURADocument): + """Class for building documents for /images/(id) endpoints.""" + + @staticmethod + def get_image_path(image_id): + """Works out the full path to the image with the given id. + + Returns None if there is no such image. + + Args: + image_id: A string in the form + "--". + """ + # Split image_id into its constituent parts + id_split = image_id.split("-") + if len(id_split) < 3: + # image_id is not in the required format + return None + parent_type = id_split[0] + parent_id = id_split[1] + img_filename = "-".join(id_split[2:]) + if not safe_filename(img_filename): + return None + + # Get the path to the directory parent's images are in + if parent_type == "album": + album = current_app.config["lib"].get_album(int(parent_id)) + if not album or not album.artpath: + return None + # Cut the filename off of artpath + # This is in preparation for supporting images in the same + # directory that are not tracked by beets. + artpath = py3_path(album.artpath) + dir_path = "/".join(artpath.split("/")[:-1]) + else: + # Images for other resource types are not supported + return None + + img_path = os.path.join(dir_path, img_filename) + # Check the image actually exists + if isfile(img_path): + return img_path + else: + return None + + @staticmethod + def resource_object(image_id): + """Construct a JSON:API resource object for the given image. + + Args: + image_id: A string in the form + "--". + """ + # Could be called as a static method, so can't use + # self.get_image_path() + image_path = ImageDocument.get_image_path(image_id) + if not image_path: + return None + + attributes = { + "role": "cover", + "mimetype": guess_type(image_path)[0], + "size": getsize(image_path), + } + try: + from PIL import Image + except ImportError: + pass + else: + im = Image.open(image_path) + attributes["width"] = im.width + attributes["height"] = im.height + + relationships = {} + # Split id into [parent_type, parent_id, filename] + id_split = image_id.split("-") + relationships[id_split[0] + "s"] = { + "data": [{"type": id_split[0], "id": id_split[1]}] + } + + return { + "id": image_id, + "type": "image", + # Remove attributes that are None, 0, "", etc. + "attributes": {k: v for k, v in attributes.items() if v}, + "relationships": relationships, + } + + def single_resource(self, image_id): + """Get info for the requested image and build a document. + + Args: + image_id: A string in the form + "--". + """ + image_resource = self.resource_object(image_id) + if not image_resource: + return self.error( + "404 Not Found", + "No image with the requested id.", + "There is no image with an id of {} in the library.".format( + image_id + ), + ) + return self.single_resource_document(image_resource) + + +# Initialise flask blueprint +aura_bp = Blueprint("aura_bp", __name__) + + +@aura_bp.route("/server") +def server_info(): + """Respond with info about the server.""" + return {"data": {"type": "server", "id": "0", "attributes": SERVER_INFO}} + + +# Track endpoints + + +@aura_bp.route("/tracks") +def all_tracks(): + """Respond with a list of all tracks and related information.""" + doc = TrackDocument() + return doc.all_resources() + + +@aura_bp.route("/tracks/") +def single_track(track_id): + """Respond with info about the specified track. + + Args: + track_id: The id of the track provided in the URL (integer). + """ + doc = TrackDocument() + return doc.single_resource(track_id) + + +@aura_bp.route("/tracks//audio") +def audio_file(track_id): + """Supply an audio file for the specified track. + + Args: + track_id: The id of the track provided in the URL (integer). + """ + track = current_app.config["lib"].get_item(track_id) + if not track: + return AURADocument.error( + "404 Not Found", + "No track with the requested id.", + "There is no track with an id of {} in the library.".format( + track_id + ), + ) + + path = py3_path(track.path) + if not isfile(path): + return AURADocument.error( + "404 Not Found", + "No audio file for the requested track.", + ( + "There is no audio file for track {} at the expected location" + ).format(track_id), + ) + + file_mimetype = guess_type(path)[0] + if not file_mimetype: + return AURADocument.error( + "500 Internal Server Error", + "Requested audio file has an unknown mimetype.", + ( + "The audio file for track {} has an unknown mimetype. " + "Its file extension is {}." + ).format(track_id, path.split(".")[-1]), + ) + + # Check that the Accept header contains the file's mimetype + # Takes into account */* and audio/* + # Adding support for the bitrate parameter would require some effort so I + # left it out. This means the client could be sent an error even if the + # audio doesn't need transcoding. + if not request.accept_mimetypes.best_match([file_mimetype]): + return AURADocument.error( + "406 Not Acceptable", + "Unsupported MIME type or bitrate parameter in Accept header.", + ( + "The audio file for track {} is only available as {} and " + "bitrate parameters are not supported." + ).format(track_id, file_mimetype), + ) + + return send_file( + path, + mimetype=file_mimetype, + # Handles filename in Content-Disposition header + as_attachment=True, + # Tries to upgrade the stream to support range requests + conditional=True, + ) + + +# Album endpoints + + +@aura_bp.route("/albums") +def all_albums(): + """Respond with a list of all albums and related information.""" + doc = AlbumDocument() + return doc.all_resources() + + +@aura_bp.route("/albums/") +def single_album(album_id): + """Respond with info about the specified album. + + Args: + album_id: The id of the album provided in the URL (integer). + """ + doc = AlbumDocument() + return doc.single_resource(album_id) + + +# Artist endpoints +# Artist ids are their names + + +@aura_bp.route("/artists") +def all_artists(): + """Respond with a list of all artists and related information.""" + doc = ArtistDocument() + return doc.all_resources() + + +# Using the path converter allows slashes in artist_id +@aura_bp.route("/artists/") +def single_artist(artist_id): + """Respond with info about the specified artist. + + Args: + artist_id: The id of the artist provided in the URL. A string + which is the artist's name. + """ + doc = ArtistDocument() + return doc.single_resource(artist_id) + + +# Image endpoints +# Image ids are in the form -- +# For example: album-13-cover.jpg + + +@aura_bp.route("/images/") +def single_image(image_id): + """Respond with info about the specified image. + + Args: + image_id: The id of the image provided in the URL. A string in + the form "--". + """ + doc = ImageDocument() + return doc.single_resource(image_id) + + +@aura_bp.route("/images//file") +def image_file(image_id): + """Supply an image file for the specified image. + + Args: + image_id: The id of the image provided in the URL. A string in + the form "--". + """ + img_path = ImageDocument.get_image_path(image_id) + if not img_path: + return AURADocument.error( + "404 Not Found", + "No image with the requested id.", + "There is no image with an id of {} in the library".format( + image_id + ), + ) + return send_file(img_path) + + +# WSGI app + + +def create_app(): + """An application factory for use by a WSGI server.""" + config["aura"].add( + { + "host": "127.0.0.1", + "port": 8337, + "cors": [], + "cors_supports_credentials": False, + "page_limit": 500, + } + ) + + app = Flask(__name__) + # Register AURA blueprint view functions under a URL prefix + app.register_blueprint(aura_bp, url_prefix="/aura") + # AURA specifies mimetype MUST be this + app.config["JSONIFY_MIMETYPE"] = "application/vnd.api+json" + # Disable auto-sorting of JSON keys + app.config["JSON_SORT_KEYS"] = False + # Provide a way to access the beets library + # The normal method of using the Library and config provided in the + # command function is not used because create_app() could be called + # by an external WSGI server. + # NOTE: this uses a 'private' function from beets.ui.__init__ + app.config["lib"] = _open_library(config) + + # Enable CORS if required + cors = config["aura"]["cors"].as_str_seq(list) + if cors: + from flask_cors import CORS + + # "Accept" is the only header clients use + app.config["CORS_ALLOW_HEADERS"] = "Accept" + app.config["CORS_RESOURCES"] = {r"/aura/*": {"origins": cors}} + app.config["CORS_SUPPORTS_CREDENTIALS"] = config["aura"][ + "cors_supports_credentials" + ].get(bool) + CORS(app) + + return app + + +# Beets Plugin Hook + + +class AURAPlugin(BeetsPlugin): + """The BeetsPlugin subclass for the AURA server plugin.""" + + def __init__(self): + """Add configuration options for the AURA plugin.""" + super().__init__() + + def commands(self): + """Add subcommand used to run the AURA server.""" + + def run_aura(lib, opts, args): + """Run the application using Flask's built in-server. + + Args: + lib: A beets Library object (not used). + opts: Command line options. An optparse.Values object. + args: The list of arguments to process (not used). + """ + app = create_app() + # Start the built-in server (not intended for production) + app.run( + host=self.config["host"].get(str), + port=self.config["port"].get(int), + debug=opts.debug, + threaded=True, + ) + + run_aura_cmd = Subcommand("aura", help="run an AURA server") + run_aura_cmd.parser.add_option( + "-d", + "--debug", + action="store_true", + default=False, + help="use Flask debug mode", + ) + run_aura_cmd.func = run_aura + return [run_aura_cmd] diff --git a/libs/common/beetsplug/badfiles.py b/libs/common/beetsplug/badfiles.py index 62c6d8af..ec465895 100644 --- a/libs/common/beetsplug/badfiles.py +++ b/libs/common/beetsplug/badfiles.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, François-Xavier Thomas. # @@ -16,18 +15,19 @@ """Use command-line tools to check for audio file corruption. """ -from __future__ import division, absolute_import, print_function -from beets.plugins import BeetsPlugin -from beets.ui import Subcommand -from beets.util import displayable_path, confit -from beets import ui from subprocess import check_output, CalledProcessError, list2cmdline, STDOUT + import shlex import os import errno import sys -import six +import confuse +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand +from beets.util import displayable_path, par_map +from beets import ui +from beets import importer class CheckerCommandException(Exception): @@ -48,8 +48,17 @@ class CheckerCommandException(Exception): class BadFiles(BeetsPlugin): + def __init__(self): + super().__init__() + self.verbose = False + + self.register_listener('import_task_start', + self.on_import_task_start) + self.register_listener('import_task_before_choice', + self.on_import_task_before_choice) + def run_command(self, cmd): - self._log.debug(u"running command: {}", + self._log.debug("running command: {}", displayable_path(list2cmdline(cmd))) try: output = check_output(cmd, stderr=STDOUT) @@ -61,7 +70,7 @@ class BadFiles(BeetsPlugin): status = e.returncode except OSError as e: raise CheckerCommandException(cmd, e) - output = output.decode(sys.getfilesystemencoding()) + output = output.decode(sys.getdefaultencoding(), 'replace') return status, errors, [line for line in output.split("\n") if line] def check_mp3val(self, path): @@ -85,68 +94,122 @@ class BadFiles(BeetsPlugin): ext = ext.lower() try: command = self.config['commands'].get(dict).get(ext) - except confit.NotFoundError: + except confuse.NotFoundError: command = None if command: return self.check_custom(command) - elif ext == "mp3": + if ext == "mp3": return self.check_mp3val - elif ext == "flac": + if ext == "flac": return self.check_flac - def check_bad(self, lib, opts, args): - for item in lib.items(ui.decargs(args)): + def check_item(self, item): + # First, check whether the path exists. If not, the user + # should probably run `beet update` to cleanup your library. + dpath = displayable_path(item.path) + self._log.debug("checking path: {}", dpath) + if not os.path.exists(item.path): + ui.print_("{}: file does not exist".format( + ui.colorize('text_error', dpath))) - # First, check whether the path exists. If not, the user - # should probably run `beet update` to cleanup your library. - dpath = displayable_path(item.path) - self._log.debug(u"checking path: {}", dpath) - if not os.path.exists(item.path): - ui.print_(u"{}: file does not exist".format( - ui.colorize('text_error', dpath))) + # Run the checker against the file if one is found + ext = os.path.splitext(item.path)[1][1:].decode('utf8', 'ignore') + checker = self.get_checker(ext) + if not checker: + self._log.error("no checker specified in the config for {}", + ext) + return [] + path = item.path + if not isinstance(path, str): + path = item.path.decode(sys.getfilesystemencoding()) + try: + status, errors, output = checker(path) + except CheckerCommandException as e: + if e.errno == errno.ENOENT: + self._log.error( + "command not found: {} when validating file: {}", + e.checker, + e.path + ) + else: + self._log.error("error invoking {}: {}", e.checker, e.msg) + return [] - # Run the checker against the file if one is found - ext = os.path.splitext(item.path)[1][1:].decode('utf8', 'ignore') - checker = self.get_checker(ext) - if not checker: - self._log.error(u"no checker specified in the config for {}", - ext) - continue - path = item.path - if not isinstance(path, six.text_type): - path = item.path.decode(sys.getfilesystemencoding()) - try: - status, errors, output = checker(path) - except CheckerCommandException as e: - if e.errno == errno.ENOENT: - self._log.error( - u"command not found: {} when validating file: {}", - e.checker, - e.path - ) - else: - self._log.error(u"error invoking {}: {}", e.checker, e.msg) - continue - if status > 0: - ui.print_(u"{}: checker exited with status {}" - .format(ui.colorize('text_error', dpath), status)) - for line in output: - ui.print_(u" {}".format(displayable_path(line))) - elif errors > 0: - ui.print_(u"{}: checker found {} errors or warnings" - .format(ui.colorize('text_warning', dpath), errors)) - for line in output: - ui.print_(u" {}".format(displayable_path(line))) - elif opts.verbose: - ui.print_(u"{}: ok".format(ui.colorize('text_success', dpath))) + error_lines = [] + + if status > 0: + error_lines.append( + "{}: checker exited with status {}" + .format(ui.colorize('text_error', dpath), status)) + for line in output: + error_lines.append(f" {line}") + + elif errors > 0: + error_lines.append( + "{}: checker found {} errors or warnings" + .format(ui.colorize('text_warning', dpath), errors)) + for line in output: + error_lines.append(f" {line}") + elif self.verbose: + error_lines.append( + "{}: ok".format(ui.colorize('text_success', dpath))) + + return error_lines + + def on_import_task_start(self, task, session): + if not self.config['check_on_import'].get(False): + return + + checks_failed = [] + + for item in task.items: + error_lines = self.check_item(item) + if error_lines: + checks_failed.append(error_lines) + + if checks_failed: + task._badfiles_checks_failed = checks_failed + + def on_import_task_before_choice(self, task, session): + if hasattr(task, '_badfiles_checks_failed'): + ui.print_('{} one or more files failed checks:' + .format(ui.colorize('text_warning', 'BAD'))) + for error in task._badfiles_checks_failed: + for error_line in error: + ui.print_(error_line) + + ui.print_() + ui.print_('What would you like to do?') + + sel = ui.input_options(['aBort', 'skip', 'continue']) + + if sel == 's': + return importer.action.SKIP + elif sel == 'c': + return None + elif sel == 'b': + raise importer.ImportAbort() + else: + raise Exception(f'Unexpected selection: {sel}') + + def command(self, lib, opts, args): + # Get items from arguments + items = lib.items(ui.decargs(args)) + self.verbose = opts.verbose + + def check_and_print(item): + for error_line in self.check_item(item): + ui.print_(error_line) + + par_map(check_and_print, items) def commands(self): bad_command = Subcommand('bad', - help=u'check for corrupt or missing files') + help='check for corrupt or missing files') bad_command.parser.add_option( - u'-v', u'--verbose', + '-v', '--verbose', action='store_true', default=False, dest='verbose', - help=u'view results for both the bad and uncorrupted files' + help='view results for both the bad and uncorrupted files' ) - bad_command.func = self.check_bad + bad_command.func = self.command return [bad_command] diff --git a/libs/common/beetsplug/bareasc.py b/libs/common/beetsplug/bareasc.py new file mode 100644 index 00000000..21836936 --- /dev/null +++ b/libs/common/beetsplug/bareasc.py @@ -0,0 +1,82 @@ +# This file is part of beets. +# Copyright 2016, Philippe Mongeau. +# Copyright 2021, Graham R. Cobb. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and ascociated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# This module is adapted from Fuzzy in accordance to the licence of +# that module + +"""Provides a bare-ASCII matching query.""" + + +from beets import ui +from beets.ui import print_, decargs +from beets.plugins import BeetsPlugin +from beets.dbcore.query import StringFieldQuery +from unidecode import unidecode + + +class BareascQuery(StringFieldQuery): + """Compare items using bare ASCII, without accents etc.""" + @classmethod + def string_match(cls, pattern, val): + """Convert both pattern and string to plain ASCII before matching. + + If pattern is all lower case, also convert string to lower case so + match is also case insensitive + """ + # smartcase + if pattern.islower(): + val = val.lower() + pattern = unidecode(pattern) + val = unidecode(val) + return pattern in val + + +class BareascPlugin(BeetsPlugin): + """Plugin to provide bare-ASCII option for beets matching.""" + def __init__(self): + """Default prefix for selecting bare-ASCII matching is #.""" + super().__init__() + self.config.add({ + 'prefix': '#', + }) + + def queries(self): + """Register bare-ASCII matching.""" + prefix = self.config['prefix'].as_str() + return {prefix: BareascQuery} + + def commands(self): + """Add bareasc command as unidecode version of 'list'.""" + cmd = ui.Subcommand('bareasc', + help='unidecode version of beet list command') + cmd.parser.usage += "\n" \ + 'Example: %prog -f \'$album: $title\' artist:beatles' + cmd.parser.add_all_common_options() + cmd.func = self.unidecode_list + return [cmd] + + def unidecode_list(self, lib, opts, args): + """Emulate normal 'list' command but with unidecode output.""" + query = decargs(args) + album = opts.album + # Copied from commands.py - list_items + if album: + for album in lib.albums(query): + bare = unidecode(str(album)) + print_(bare) + else: + for item in lib.items(query): + bare = unidecode(str(item)) + print_(bare) diff --git a/libs/common/beetsplug/beatport.py b/libs/common/beetsplug/beatport.py index fc412d99..133441d7 100644 --- a/libs/common/beetsplug/beatport.py +++ b/libs/common/beetsplug/beatport.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,11 +14,9 @@ """Adds Beatport release and track search support to the autotagger """ -from __future__ import division, absolute_import, print_function import json import re -import six from datetime import datetime, timedelta from requests_oauthlib import OAuth1Session @@ -28,35 +25,35 @@ from requests_oauthlib.oauth1_session import (TokenRequestDenied, TokenMissing, import beets import beets.ui -from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance -from beets.plugins import BeetsPlugin -from beets.util import confit +from beets.autotag.hooks import AlbumInfo, TrackInfo +from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance +import confuse AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing) -USER_AGENT = u'beets/{0} +http://beets.io/'.format(beets.__version__) +USER_AGENT = f'beets/{beets.__version__} +https://beets.io/' class BeatportAPIError(Exception): pass -class BeatportObject(object): +class BeatportObject: def __init__(self, data): self.beatport_id = data['id'] - self.name = six.text_type(data['name']) + self.name = str(data['name']) if 'releaseDate' in data: self.release_date = datetime.strptime(data['releaseDate'], '%Y-%m-%d') if 'artists' in data: - self.artists = [(x['id'], six.text_type(x['name'])) + self.artists = [(x['id'], str(x['name'])) for x in data['artists']] if 'genres' in data: - self.genres = [six.text_type(x['name']) + self.genres = [str(x['name']) for x in data['genres']] -class BeatportClient(object): +class BeatportClient: _api_base = 'https://oauth-api.beatport.com' def __init__(self, c_key, c_secret, auth_key=None, auth_secret=None): @@ -109,7 +106,7 @@ class BeatportClient(object): :rtype: (unicode, unicode) tuple """ self.api.parse_authorization_response( - "http://beets.io/auth?" + auth_data) + "https://beets.io/auth?" + auth_data) access_data = self.api.fetch_access_token( self._make_url('/identity/1/oauth/access-token')) return access_data['oauth_token'], access_data['oauth_token_secret'] @@ -131,7 +128,7 @@ class BeatportClient(object): """ response = self._get('catalog/3/search', query=query, perPage=5, - facets=['fieldType:{0}'.format(release_type)]) + facets=[f'fieldType:{release_type}']) for item in response: if release_type == 'release': if details: @@ -150,9 +147,11 @@ class BeatportClient(object): :rtype: :py:class:`BeatportRelease` """ response = self._get('/catalog/3/releases', id=beatport_id) - release = BeatportRelease(response[0]) - release.tracks = self.get_release_tracks(beatport_id) - return release + if response: + release = BeatportRelease(response[0]) + release.tracks = self.get_release_tracks(beatport_id) + return release + return None def get_release_tracks(self, beatport_id): """ Get all tracks for a given release. @@ -191,7 +190,7 @@ class BeatportClient(object): response = self.api.get(self._make_url(endpoint), params=kwargs) except Exception as e: raise BeatportAPIError("Error connecting to Beatport API: {}" - .format(e.message)) + .format(e)) if not response: raise BeatportAPIError( "Error {0.status_code} for '{0.request.path_url}" @@ -199,21 +198,20 @@ class BeatportClient(object): return response.json()['results'] -@six.python_2_unicode_compatible class BeatportRelease(BeatportObject): def __str__(self): if len(self.artists) < 4: artist_str = ", ".join(x[1] for x in self.artists) else: artist_str = "Various Artists" - return u"".format( + return "".format( artist_str, self.name, self.catalog_number, ) def __repr__(self): - return six.text_type(self).encode('utf-8') + return str(self).encode('utf-8') def __init__(self, data): BeatportObject.__init__(self, data) @@ -224,26 +222,26 @@ class BeatportRelease(BeatportObject): if 'category' in data: self.category = data['category'] if 'slug' in data: - self.url = "http://beatport.com/release/{0}/{1}".format( + self.url = "https://beatport.com/release/{}/{}".format( data['slug'], data['id']) + self.genre = data.get('genre') -@six.python_2_unicode_compatible class BeatportTrack(BeatportObject): def __str__(self): artist_str = ", ".join(x[1] for x in self.artists) - return (u"" + return ("" .format(artist_str, self.name, self.mix_name)) def __repr__(self): - return six.text_type(self).encode('utf-8') + return str(self).encode('utf-8') def __init__(self, data): BeatportObject.__init__(self, data) if 'title' in data: - self.title = six.text_type(data['title']) + self.title = str(data['title']) if 'mixName' in data: - self.mix_name = six.text_type(data['mixName']) + self.mix_name = str(data['mixName']) self.length = timedelta(milliseconds=data.get('lengthMs', 0) or 0) if not self.length: try: @@ -252,14 +250,26 @@ class BeatportTrack(BeatportObject): except ValueError: pass if 'slug' in data: - self.url = "http://beatport.com/track/{0}/{1}".format(data['slug'], - data['id']) + self.url = "https://beatport.com/track/{}/{}" \ + .format(data['slug'], data['id']) self.track_number = data.get('trackNumber') + self.bpm = data.get('bpm') + self.initial_key = str( + (data.get('key') or {}).get('shortName') + ) + + # Use 'subgenre' and if not present, 'genre' as a fallback. + if data.get('subGenres'): + self.genre = str(data['subGenres'][0].get('name')) + elif data.get('genres'): + self.genre = str(data['genres'][0].get('name')) class BeatportPlugin(BeetsPlugin): + data_source = 'Beatport' + def __init__(self): - super(BeatportPlugin, self).__init__() + super().__init__() self.config.add({ 'apikey': '57713c3906af6f5def151b33601389176b37b429', 'apisecret': 'b3fe08c93c80aefd749fe871a16cd2bb32e2b954', @@ -279,7 +289,7 @@ class BeatportPlugin(BeetsPlugin): try: with open(self._tokenfile()) as f: tokendata = json.load(f) - except IOError: + except OSError: # No token yet. Generate one. token, secret = self.authenticate(c_key, c_secret) else: @@ -294,22 +304,22 @@ class BeatportPlugin(BeetsPlugin): try: url = auth_client.get_authorize_url() except AUTH_ERRORS as e: - self._log.debug(u'authentication error: {0}', e) - raise beets.ui.UserError(u'communication with Beatport failed') + self._log.debug('authentication error: {0}', e) + raise beets.ui.UserError('communication with Beatport failed') - beets.ui.print_(u"To authenticate with Beatport, visit:") + beets.ui.print_("To authenticate with Beatport, visit:") beets.ui.print_(url) # Ask for the verifier data and validate it. - data = beets.ui.input_(u"Enter the string displayed in your browser:") + data = beets.ui.input_("Enter the string displayed in your browser:") try: token, secret = auth_client.get_access_token(data) except AUTH_ERRORS as e: - self._log.debug(u'authentication error: {0}', e) - raise beets.ui.UserError(u'Beatport token request failed') + self._log.debug('authentication error: {0}', e) + raise beets.ui.UserError('Beatport token request failed') # Save the token for later use. - self._log.debug(u'Beatport token {0}, secret {1}', token, secret) + self._log.debug('Beatport token {0}, secret {1}', token, secret) with open(self._tokenfile(), 'w') as f: json.dump({'token': token, 'secret': secret}, f) @@ -318,74 +328,80 @@ class BeatportPlugin(BeetsPlugin): def _tokenfile(self): """Get the path to the JSON file for storing the OAuth token. """ - return self.config['tokenfile'].get(confit.Filename(in_app_dir=True)) + return self.config['tokenfile'].get(confuse.Filename(in_app_dir=True)) def album_distance(self, items, album_info, mapping): - """Returns the beatport source weight and the maximum source weight + """Returns the Beatport source weight and the maximum source weight for albums. """ - dist = Distance() - if album_info.data_source == 'Beatport': - dist.add('source', self.config['source_weight'].as_number()) - return dist + return get_distance( + data_source=self.data_source, + info=album_info, + config=self.config + ) def track_distance(self, item, track_info): - """Returns the beatport source weight and the maximum source weight + """Returns the Beatport source weight and the maximum source weight for individual tracks. """ - dist = Distance() - if track_info.data_source == 'Beatport': - dist.add('source', self.config['source_weight'].as_number()) - return dist + return get_distance( + data_source=self.data_source, + info=track_info, + config=self.config + ) - def candidates(self, items, artist, release, va_likely): + def candidates(self, items, artist, release, va_likely, extra_tags=None): """Returns a list of AlbumInfo objects for beatport search results matching release and artist (if not various). """ if va_likely: query = release else: - query = '%s %s' % (artist, release) + query = f'{artist} {release}' try: return self._get_releases(query) except BeatportAPIError as e: - self._log.debug(u'API Error: {0} (query: {1})', e, query) + self._log.debug('API Error: {0} (query: {1})', e, query) return [] def item_candidates(self, item, artist, title): """Returns a list of TrackInfo objects for beatport search results matching title and artist. """ - query = '%s %s' % (artist, title) + query = f'{artist} {title}' try: return self._get_tracks(query) except BeatportAPIError as e: - self._log.debug(u'API Error: {0} (query: {1})', e, query) + self._log.debug('API Error: {0} (query: {1})', e, query) return [] def album_for_id(self, release_id): """Fetches a release by its Beatport ID and returns an AlbumInfo object - or None if the release is not found. + or None if the query is not a valid ID or release is not found. """ - self._log.debug(u'Searching for release {0}', release_id) + self._log.debug('Searching for release {0}', release_id) match = re.search(r'(^|beatport\.com/release/.+/)(\d+)$', release_id) if not match: + self._log.debug('Not a valid Beatport release ID.') return None release = self.client.get_release(match.group(2)) - album = self._get_album_info(release) - return album + if release: + return self._get_album_info(release) + return None def track_for_id(self, track_id): """Fetches a track by its Beatport ID and returns a TrackInfo object - or None if the track is not found. + or None if the track is not a valid Beatport ID or track is not found. """ - self._log.debug(u'Searching for track {0}', track_id) + self._log.debug('Searching for track {0}', track_id) match = re.search(r'(^|beatport\.com/track/.+/)(\d+)$', track_id) if not match: + self._log.debug('Not a valid Beatport track ID.') return None bp_track = self.client.get_track(match.group(2)) - track = self._get_track_info(bp_track) - return track + if bp_track is not None: + return self._get_track_info(bp_track) + return None def _get_releases(self, query): """Returns a list of AlbumInfo objects for a beatport search query. @@ -408,7 +424,7 @@ class BeatportPlugin(BeetsPlugin): va = len(release.artists) > 3 artist, artist_id = self._get_artist(release.artists) if va: - artist = u"Various Artists" + artist = "Various Artists" tracks = [self._get_track_info(x) for x in release.tracks] return AlbumInfo(album=release.name, album_id=release.beatport_id, @@ -418,40 +434,33 @@ class BeatportPlugin(BeetsPlugin): month=release.release_date.month, day=release.release_date.day, label=release.label_name, - catalognum=release.catalog_number, media=u'Digital', - data_source=u'Beatport', data_url=release.url) + catalognum=release.catalog_number, media='Digital', + data_source=self.data_source, data_url=release.url, + genre=release.genre) def _get_track_info(self, track): """Returns a TrackInfo object for a Beatport Track object. """ title = track.name - if track.mix_name != u"Original Mix": - title += u" ({0})".format(track.mix_name) + if track.mix_name != "Original Mix": + title += f" ({track.mix_name})" artist, artist_id = self._get_artist(track.artists) length = track.length.total_seconds() return TrackInfo(title=title, track_id=track.beatport_id, artist=artist, artist_id=artist_id, length=length, index=track.track_number, medium_index=track.track_number, - data_source=u'Beatport', data_url=track.url) + data_source=self.data_source, data_url=track.url, + bpm=track.bpm, initial_key=track.initial_key, + genre=track.genre) def _get_artist(self, artists): """Returns an artist string (all artists) and an artist_id (the main artist) for a list of Beatport release or track artists. """ - artist_id = None - bits = [] - for artist in artists: - if not artist_id: - artist_id = artist[0] - name = artist[1] - # Strip disambiguation number. - name = re.sub(r' \(\d+\)$', '', name) - # Move articles to the front. - name = re.sub(r'^(.*?), (a|an|the)$', r'\2 \1', name, flags=re.I) - bits.append(name) - artist = ', '.join(bits).replace(' ,', ',') or None - return artist, artist_id + return MetadataSourcePlugin.get_artist( + artists=artists, id_key=0, name_key=1 + ) def _get_tracks(self, query): """Returns a list of TrackInfo objects for a Beatport query. diff --git a/libs/common/beetsplug/bench.py b/libs/common/beetsplug/bench.py index 41f575cd..6dffbdda 100644 --- a/libs/common/beetsplug/bench.py +++ b/libs/common/beetsplug/bench.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,7 +15,6 @@ """Some simple performance benchmarks for beets. """ -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets import ui diff --git a/libs/common/beetsplug/bpd/__init__.py b/libs/common/beetsplug/bpd/__init__.py index 1049f0c7..07198b1b 100644 --- a/libs/common/beetsplug/bpd/__init__.py +++ b/libs/common/beetsplug/bpd/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -18,37 +17,38 @@ Beets library. Attempts to implement a compatible protocol to allow use of the wide range of MPD clients. """ -from __future__ import division, absolute_import, print_function import re +import sys from string import Template import traceback import random import time +import math +import inspect +import socket import beets from beets.plugins import BeetsPlugin import beets.ui -from beets import logging from beets import vfs from beets.util import bluelet from beets.library import Item from beets import dbcore -from beets.mediafile import MediaFile -import six +from mediafile import MediaFile -PROTOCOL_VERSION = '0.13.0' +PROTOCOL_VERSION = '0.16.0' BUFSIZE = 1024 -HELLO = u'OK MPD %s' % PROTOCOL_VERSION -CLIST_BEGIN = u'command_list_begin' -CLIST_VERBOSE_BEGIN = u'command_list_ok_begin' -CLIST_END = u'command_list_end' -RESP_OK = u'OK' -RESP_CLIST_VERBOSE = u'list_OK' -RESP_ERR = u'ACK' +HELLO = 'OK MPD %s' % PROTOCOL_VERSION +CLIST_BEGIN = 'command_list_begin' +CLIST_VERBOSE_BEGIN = 'command_list_ok_begin' +CLIST_END = 'command_list_end' +RESP_OK = 'OK' +RESP_CLIST_VERBOSE = 'list_OK' +RESP_ERR = 'ACK' -NEWLINE = u"\n" +NEWLINE = "\n" ERROR_NOT_LIST = 1 ERROR_ARG = 2 @@ -68,14 +68,18 @@ VOLUME_MAX = 100 SAFE_COMMANDS = ( # Commands that are available when unauthenticated. - u'close', u'commands', u'notcommands', u'password', u'ping', + 'close', 'commands', 'notcommands', 'password', 'ping', ) -ITEM_KEYS_WRITABLE = set(MediaFile.fields()).intersection(Item._fields.keys()) +# List of subsystems/events used by the `idle` command. +SUBSYSTEMS = [ + 'update', 'player', 'mixer', 'options', 'playlist', 'database', + # Related to unsupported commands: + 'stored_playlist', 'output', 'subscription', 'sticker', 'message', + 'partition', +] -# Loggers. -log = logging.getLogger('beets.bpd') -global_log = logging.getLogger('beets') +ITEM_KEYS_WRITABLE = set(MediaFile.fields()).intersection(Item._fields.keys()) # Gstreamer import error. @@ -95,7 +99,7 @@ class BPDError(Exception): self.cmd_name = cmd_name self.index = index - template = Template(u'$resp [$code@$index] {$cmd_name} $message') + template = Template('$resp [$code@$index] {$cmd_name} $message') def response(self): """Returns a string to be used as the response code for the @@ -124,9 +128,9 @@ def make_bpd_error(s_code, s_message): pass return NewBPDError -ArgumentTypeError = make_bpd_error(ERROR_ARG, u'invalid type for argument') -ArgumentIndexError = make_bpd_error(ERROR_ARG, u'argument out of range') -ArgumentNotFoundError = make_bpd_error(ERROR_NO_EXIST, u'argument not found') +ArgumentTypeError = make_bpd_error(ERROR_ARG, 'invalid type for argument') +ArgumentIndexError = make_bpd_error(ERROR_ARG, 'argument out of range') +ArgumentNotFoundError = make_bpd_error(ERROR_NO_EXIST, 'argument not found') def cast_arg(t, val): @@ -150,10 +154,20 @@ class BPDClose(Exception): should be closed. """ + +class BPDIdle(Exception): + """Raised by a command to indicate the client wants to enter the idle state + and should be notified when a relevant event happens. + """ + def __init__(self, subsystems): + super().__init__() + self.subsystems = set(subsystems) + + # Generic server infrastructure, implementing the basic protocol. -class BaseServer(object): +class BaseServer: """A MPD-compatible music player server. The functions with the `cmd_` prefix are invoked in response to @@ -166,34 +180,87 @@ class BaseServer(object): This is a generic superclass and doesn't support many commands. """ - def __init__(self, host, port, password): + def __init__(self, host, port, password, ctrl_port, log, ctrl_host=None): """Create a new server bound to address `host` and listening on port `port`. If `password` is given, it is required to do anything significant on the server. + A separate control socket is established listening to `ctrl_host` on + port `ctrl_port` which is used to forward notifications from the player + and can be sent debug commands (e.g. using netcat). """ self.host, self.port, self.password = host, port, password + self.ctrl_host, self.ctrl_port = ctrl_host or host, ctrl_port + self.ctrl_sock = None + self._log = log # Default server values. self.random = False self.repeat = False + self.consume = False + self.single = False self.volume = VOLUME_MAX self.crossfade = 0 + self.mixrampdb = 0.0 + self.mixrampdelay = float('nan') + self.replay_gain_mode = 'off' self.playlist = [] self.playlist_version = 0 self.current_index = -1 self.paused = False self.error = None + # Current connections + self.connections = set() + # Object for random numbers generation self.random_obj = random.Random() + def connect(self, conn): + """A new client has connected. + """ + self.connections.add(conn) + + def disconnect(self, conn): + """Client has disconnected; clean up residual state. + """ + self.connections.remove(conn) + def run(self): """Block and start listening for connections from clients. An interrupt (^C) closes the server. """ self.startup_time = time.time() - bluelet.run(bluelet.server(self.host, self.port, - Connection.handler(self))) + + def start(): + yield bluelet.spawn( + bluelet.server(self.ctrl_host, self.ctrl_port, + ControlConnection.handler(self))) + yield bluelet.server(self.host, self.port, + MPDConnection.handler(self)) + bluelet.run(start()) + + def dispatch_events(self): + """If any clients have idle events ready, send them. + """ + # We need a copy of `self.connections` here since clients might + # disconnect once we try and send to them, changing `self.connections`. + for conn in list(self.connections): + yield bluelet.spawn(conn.send_notifications()) + + def _ctrl_send(self, message): + """Send some data over the control socket. + If it's our first time, open the socket. The message should be a + string without a terminal newline. + """ + if not self.ctrl_sock: + self.ctrl_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.ctrl_sock.connect((self.ctrl_host, self.ctrl_port)) + self.ctrl_sock.sendall((message + '\n').encode('utf-8')) + + def _send_event(self, event): + """Notify subscribed connections of an event.""" + for conn in self.connections: + conn.notify(event) def _item_info(self, item): """An abstract method that should response lines containing a @@ -231,10 +298,10 @@ class BaseServer(object): def _succ_idx(self): """Returns the index for the next song to play. - It also considers random and repeat flags. + It also considers random, single and repeat flags. No boundaries are checked. """ - if self.repeat: + if self.repeat and self.single: return self.current_index if self.random: return self._random_idx() @@ -245,7 +312,7 @@ class BaseServer(object): It also considers random and repeat flags. No boundaries are checked. """ - if self.repeat: + if self.repeat and self.single: return self.current_index if self.random: return self._random_idx() @@ -255,9 +322,17 @@ class BaseServer(object): """Succeeds.""" pass + def cmd_idle(self, conn, *subsystems): + subsystems = subsystems or SUBSYSTEMS + for system in subsystems: + if system not in SUBSYSTEMS: + raise BPDError(ERROR_ARG, + f'Unrecognised idle event: {system}') + raise BPDIdle(subsystems) # put the connection into idle mode + def cmd_kill(self, conn): """Exits the server process.""" - exit(0) + sys.exit(0) def cmd_close(self, conn): """Closes the connection.""" @@ -269,20 +344,20 @@ class BaseServer(object): conn.authenticated = True else: conn.authenticated = False - raise BPDError(ERROR_PASSWORD, u'incorrect password') + raise BPDError(ERROR_PASSWORD, 'incorrect password') def cmd_commands(self, conn): """Lists the commands available to the user.""" if self.password and not conn.authenticated: # Not authenticated. Show limited list of commands. for cmd in SAFE_COMMANDS: - yield u'command: ' + cmd + yield 'command: ' + cmd else: # Authenticated. Show all commands. for func in dir(self): if func.startswith('cmd_'): - yield u'command: ' + func[4:] + yield 'command: ' + func[4:] def cmd_notcommands(self, conn): """Lists all unavailable commands.""" @@ -292,7 +367,7 @@ class BaseServer(object): if func.startswith('cmd_'): cmd = func[4:] if cmd not in SAFE_COMMANDS: - yield u'command: ' + cmd + yield 'command: ' + cmd else: # Authenticated. No commands are unavailable. @@ -306,29 +381,43 @@ class BaseServer(object): playlist, playlistlength, and xfade. """ yield ( - u'volume: ' + six.text_type(self.volume), - u'repeat: ' + six.text_type(int(self.repeat)), - u'random: ' + six.text_type(int(self.random)), - u'playlist: ' + six.text_type(self.playlist_version), - u'playlistlength: ' + six.text_type(len(self.playlist)), - u'xfade: ' + six.text_type(self.crossfade), + 'repeat: ' + str(int(self.repeat)), + 'random: ' + str(int(self.random)), + 'consume: ' + str(int(self.consume)), + 'single: ' + str(int(self.single)), + 'playlist: ' + str(self.playlist_version), + 'playlistlength: ' + str(len(self.playlist)), + 'mixrampdb: ' + str(self.mixrampdb), ) + if self.volume > 0: + yield 'volume: ' + str(self.volume) + + if not math.isnan(self.mixrampdelay): + yield 'mixrampdelay: ' + str(self.mixrampdelay) + if self.crossfade > 0: + yield 'xfade: ' + str(self.crossfade) + if self.current_index == -1: - state = u'stop' + state = 'stop' elif self.paused: - state = u'pause' + state = 'pause' else: - state = u'play' - yield u'state: ' + state + state = 'play' + yield 'state: ' + state if self.current_index != -1: # i.e., paused or playing current_id = self._item_id(self.playlist[self.current_index]) - yield u'song: ' + six.text_type(self.current_index) - yield u'songid: ' + six.text_type(current_id) + yield 'song: ' + str(self.current_index) + yield 'songid: ' + str(current_id) + if len(self.playlist) > self.current_index + 1: + # If there's a next song, report its index too. + next_id = self._item_id(self.playlist[self.current_index + 1]) + yield 'nextsong: ' + str(self.current_index + 1) + yield 'nextsongid: ' + str(next_id) if self.error: - yield u'error: ' + self.error + yield 'error: ' + self.error def cmd_clearerror(self, conn): """Removes the persistent error state of the server. This @@ -340,29 +429,82 @@ class BaseServer(object): def cmd_random(self, conn, state): """Set or unset random (shuffle) mode.""" self.random = cast_arg('intbool', state) + self._send_event('options') def cmd_repeat(self, conn, state): """Set or unset repeat mode.""" self.repeat = cast_arg('intbool', state) + self._send_event('options') + + def cmd_consume(self, conn, state): + """Set or unset consume mode.""" + self.consume = cast_arg('intbool', state) + self._send_event('options') + + def cmd_single(self, conn, state): + """Set or unset single mode.""" + # TODO support oneshot in addition to 0 and 1 [MPD 0.20] + self.single = cast_arg('intbool', state) + self._send_event('options') def cmd_setvol(self, conn, vol): """Set the player's volume level (0-100).""" vol = cast_arg(int, vol) if vol < VOLUME_MIN or vol > VOLUME_MAX: - raise BPDError(ERROR_ARG, u'volume out of range') + raise BPDError(ERROR_ARG, 'volume out of range') self.volume = vol + self._send_event('mixer') + + def cmd_volume(self, conn, vol_delta): + """Deprecated command to change the volume by a relative amount.""" + vol_delta = cast_arg(int, vol_delta) + return self.cmd_setvol(conn, self.volume + vol_delta) def cmd_crossfade(self, conn, crossfade): """Set the number of seconds of crossfading.""" crossfade = cast_arg(int, crossfade) if crossfade < 0: - raise BPDError(ERROR_ARG, u'crossfade time must be nonnegative') + raise BPDError(ERROR_ARG, 'crossfade time must be nonnegative') + self._log.warning('crossfade is not implemented in bpd') + self.crossfade = crossfade + self._send_event('options') + + def cmd_mixrampdb(self, conn, db): + """Set the mixramp normalised max volume in dB.""" + db = cast_arg(float, db) + if db > 0: + raise BPDError(ERROR_ARG, 'mixrampdb time must be negative') + self._log.warning('mixramp is not implemented in bpd') + self.mixrampdb = db + self._send_event('options') + + def cmd_mixrampdelay(self, conn, delay): + """Set the mixramp delay in seconds.""" + delay = cast_arg(float, delay) + if delay < 0: + raise BPDError(ERROR_ARG, 'mixrampdelay time must be nonnegative') + self._log.warning('mixramp is not implemented in bpd') + self.mixrampdelay = delay + self._send_event('options') + + def cmd_replay_gain_mode(self, conn, mode): + """Set the replay gain mode.""" + if mode not in ['off', 'track', 'album', 'auto']: + raise BPDError(ERROR_ARG, 'Unrecognised replay gain mode') + self._log.warning('replay gain is not implemented in bpd') + self.replay_gain_mode = mode + self._send_event('options') + + def cmd_replay_gain_status(self, conn): + """Get the replaygain mode.""" + yield 'replay_gain_mode: ' + str(self.replay_gain_mode) def cmd_clear(self, conn): """Clear the playlist.""" self.playlist = [] self.playlist_version += 1 self.cmd_stop(conn) + self._send_event('playlist') def cmd_delete(self, conn, index): """Remove the song at index from the playlist.""" @@ -378,6 +520,7 @@ class BaseServer(object): elif index < self.current_index: # Deleted before playing. # Shift playing index down. self.current_index -= 1 + self._send_event('playlist') def cmd_deleteid(self, conn, track_id): self.cmd_delete(conn, self._id_to_index(track_id)) @@ -401,6 +544,7 @@ class BaseServer(object): self.current_index += 1 self.playlist_version += 1 + self._send_event('playlist') def cmd_moveid(self, conn, idx_from, idx_to): idx_from = self._id_to_index(idx_from) @@ -426,6 +570,7 @@ class BaseServer(object): self.current_index = i self.playlist_version += 1 + self._send_event('playlist') def cmd_swapid(self, conn, i_id, j_id): i = self._id_to_index(i_id) @@ -436,23 +581,27 @@ class BaseServer(object): """Indicates supported URL schemes. None by default.""" pass - def cmd_playlistinfo(self, conn, index=-1): + def cmd_playlistinfo(self, conn, index=None): """Gives metadata information about the entire playlist or a single track, given by its index. """ - index = cast_arg(int, index) - if index == -1: + if index is None: for track in self.playlist: yield self._item_info(track) else: + indices = self._parse_range(index, accept_single_number=True) try: - track = self.playlist[index] + tracks = [self.playlist[i] for i in indices] except IndexError: raise ArgumentIndexError() - yield self._item_info(track) + for track in tracks: + yield self._item_info(track) - def cmd_playlistid(self, conn, track_id=-1): - return self.cmd_playlistinfo(conn, self._id_to_index(track_id)) + def cmd_playlistid(self, conn, track_id=None): + if track_id is not None: + track_id = cast_arg(int, track_id) + track_id = self._id_to_index(track_id) + return self.cmd_playlistinfo(conn, track_id) def cmd_plchanges(self, conn, version): """Sends playlist changes since the given version. @@ -469,8 +618,8 @@ class BaseServer(object): Also a dummy implementation. """ for idx, track in enumerate(self.playlist): - yield u'cpos: ' + six.text_type(idx) - yield u'Id: ' + six.text_type(track.id) + yield 'cpos: ' + str(idx) + yield 'Id: ' + str(track.id) def cmd_currentsong(self, conn): """Sends information about the currently-playing song. @@ -481,20 +630,38 @@ class BaseServer(object): def cmd_next(self, conn): """Advance to the next song in the playlist.""" + old_index = self.current_index self.current_index = self._succ_idx() + if self.consume: + # TODO how does consume interact with single+repeat? + self.playlist.pop(old_index) + if self.current_index > old_index: + self.current_index -= 1 + self.playlist_version += 1 + self._send_event("playlist") if self.current_index >= len(self.playlist): - # Fallen off the end. Just move to stopped state. + # Fallen off the end. Move to stopped state or loop. + if self.repeat: + self.current_index = -1 + return self.cmd_play(conn) + return self.cmd_stop(conn) + elif self.single and not self.repeat: return self.cmd_stop(conn) else: return self.cmd_play(conn) def cmd_previous(self, conn): """Step back to the last song.""" + old_index = self.current_index self.current_index = self._prev_idx() + if self.consume: + self.playlist.pop(old_index) if self.current_index < 0: - return self.cmd_stop(conn) - else: - return self.cmd_play(conn) + if self.repeat: + self.current_index = len(self.playlist) - 1 + else: + self.current_index = 0 + return self.cmd_play(conn) def cmd_pause(self, conn, state=None): """Set the pause state playback.""" @@ -502,12 +669,13 @@ class BaseServer(object): self.paused = not self.paused # Toggle. else: self.paused = cast_arg('intbool', state) + self._send_event('player') def cmd_play(self, conn, index=-1): """Begin playback, possibly at a specified playlist index.""" index = cast_arg(int, index) - if index < -1 or index > len(self.playlist): + if index < -1 or index >= len(self.playlist): raise ArgumentIndexError() if index == -1: # No index specified: start where we are. @@ -521,6 +689,7 @@ class BaseServer(object): self.current_index = index self.paused = False + self._send_event('player') def cmd_playid(self, conn, track_id=0): track_id = cast_arg(int, track_id) @@ -534,6 +703,7 @@ class BaseServer(object): """Stop playback.""" self.current_index = -1 self.paused = False + self._send_event('player') def cmd_seek(self, conn, index, pos): """Seek to a specified point in a specified song.""" @@ -541,28 +711,40 @@ class BaseServer(object): if index < 0 or index >= len(self.playlist): raise ArgumentIndexError() self.current_index = index + self._send_event('player') def cmd_seekid(self, conn, track_id, pos): index = self._id_to_index(track_id) return self.cmd_seek(conn, index, pos) - def cmd_profile(self, conn): - """Memory profiling for debugging.""" - from guppy import hpy - heap = hpy().heap() - print(heap) + # Additions to the MPD protocol. + + def cmd_crash_TypeError(self, conn): # noqa: N802 + """Deliberately trigger a TypeError for testing purposes. + We want to test that the server properly responds with ERROR_SYSTEM + without crashing, and that this is not treated as ERROR_ARG (since it + is caused by a programming error, not a protocol error). + """ + 'a' + 2 -class Connection(object): - """A connection between a client and the server. Handles input and - output from and to the client. +class Connection: + """A connection between a client and the server. """ def __init__(self, server, sock): """Create a new connection for the accepted socket `client`. """ self.server = server self.sock = sock - self.authenticated = False + self.address = '{}:{}'.format(*sock.sock.getpeername()) + + def debug(self, message, kind=' '): + """Log a debug message about this connection. + """ + self.server._log.debug('{}[{}]: {}', kind, self.address, message) + + def run(self): + pass def send(self, lines): """Send lines, which which is either a single string or an @@ -570,14 +752,35 @@ class Connection(object): added after every string. Returns a Bluelet event that sends the data. """ - if isinstance(lines, six.string_types): + if isinstance(lines, str): lines = [lines] out = NEWLINE.join(lines) + NEWLINE - log.debug('{}', out[:-1]) # Don't log trailing newline. - if isinstance(out, six.text_type): + for l in out.split(NEWLINE)[:-1]: + self.debug(l, kind='>') + if isinstance(out, str): out = out.encode('utf-8') return self.sock.sendall(out) + @classmethod + def handler(cls, server): + def _handle(sock): + """Creates a new `Connection` and runs it. + """ + return cls(server, sock).run() + return _handle + + +class MPDConnection(Connection): + """A connection that receives commands from an MPD-compatible client. + """ + def __init__(self, server, sock): + """Create a new connection for the accepted socket `client`. + """ + super().__init__(server, sock) + self.authenticated = False + self.notifications = set() + self.idle_subscriptions = set() + def do_command(self, command): """A coroutine that runs the given command and sends an appropriate response.""" @@ -590,28 +793,75 @@ class Connection(object): # Send success code. yield self.send(RESP_OK) + def disconnect(self): + """The connection has closed for any reason. + """ + self.server.disconnect(self) + self.debug('disconnected', kind='*') + + def notify(self, event): + """Queue up an event for sending to this client. + """ + self.notifications.add(event) + + def send_notifications(self, force_close_idle=False): + """Send the client any queued events now. + """ + pending = self.notifications.intersection(self.idle_subscriptions) + try: + for event in pending: + yield self.send(f'changed: {event}') + if pending or force_close_idle: + self.idle_subscriptions = set() + self.notifications = self.notifications.difference(pending) + yield self.send(RESP_OK) + except bluelet.SocketClosedError: + self.disconnect() # Client disappeared. + def run(self): """Send a greeting to the client and begin processing commands as they arrive. """ + self.debug('connected', kind='*') + self.server.connect(self) yield self.send(HELLO) clist = None # Initially, no command list is being constructed. while True: line = yield self.sock.readline() if not line: + self.disconnect() # Client disappeared. break line = line.strip() if not line: + err = BPDError(ERROR_UNKNOWN, 'No command given') + yield self.send(err.response()) + self.disconnect() # Client sent a blank line. break line = line.decode('utf8') # MPD protocol uses UTF-8. - log.debug(u'{}', line) + for l in line.split(NEWLINE): + self.debug(l, kind='<') + + if self.idle_subscriptions: + # The connection is in idle mode. + if line == 'noidle': + yield bluelet.call(self.send_notifications(True)) + else: + err = BPDError(ERROR_UNKNOWN, + f'Got command while idle: {line}') + yield self.send(err.response()) + break + continue + if line == 'noidle': + # When not in idle, this command sends no response. + continue if clist is not None: # Command list already opened. if line == CLIST_END: yield bluelet.call(self.do_command(clist)) clist = None # Clear the command list. + yield bluelet.call(self.server.dispatch_events()) else: clist.append(Command(line)) @@ -626,18 +876,74 @@ class Connection(object): except BPDClose: # Command indicates that the conn should close. self.sock.close() + self.disconnect() # Client explicitly closed. return - - @classmethod - def handler(cls, server): - def _handle(sock): - """Creates a new `Connection` and runs it. - """ - return cls(server, sock).run() - return _handle + except BPDIdle as e: + self.idle_subscriptions = e.subsystems + self.debug('awaiting: {}'.format(' '.join(e.subsystems)), + kind='z') + yield bluelet.call(self.server.dispatch_events()) -class Command(object): +class ControlConnection(Connection): + """A connection used to control BPD for debugging and internal events. + """ + def __init__(self, server, sock): + """Create a new connection for the accepted socket `client`. + """ + super().__init__(server, sock) + + def debug(self, message, kind=' '): + self.server._log.debug('CTRL {}[{}]: {}', kind, self.address, message) + + def run(self): + """Listen for control commands and delegate to `ctrl_*` methods. + """ + self.debug('connected', kind='*') + while True: + line = yield self.sock.readline() + if not line: + break # Client disappeared. + line = line.strip() + if not line: + break # Client sent a blank line. + line = line.decode('utf8') # Protocol uses UTF-8. + for l in line.split(NEWLINE): + self.debug(l, kind='<') + command = Command(line) + try: + func = command.delegate('ctrl_', self) + yield bluelet.call(func(*command.args)) + except (AttributeError, TypeError) as e: + yield self.send('ERROR: {}'.format(e.args[0])) + except Exception: + yield self.send(['ERROR: server error', + traceback.format_exc().rstrip()]) + + def ctrl_play_finished(self): + """Callback from the player signalling a song finished playing. + """ + yield bluelet.call(self.server.dispatch_events()) + + def ctrl_profile(self): + """Memory profiling for debugging. + """ + from guppy import hpy + heap = hpy().heap() + yield self.send(heap) + + def ctrl_nickname(self, oldlabel, newlabel): + """Rename a client in the log messages. + """ + for c in self.server.connections: + if c.address == oldlabel: + c.address = newlabel + break + else: + yield self.send(f'ERROR: no such client: {oldlabel}') + + +class Command: """A command issued by the client for processing by the server. """ @@ -657,27 +963,59 @@ class Command(object): if match[0]: # Quoted argument. arg = match[0] - arg = arg.replace(u'\\"', u'"').replace(u'\\\\', u'\\') + arg = arg.replace('\\"', '"').replace('\\\\', '\\') else: # Unquoted argument. arg = match[1] self.args.append(arg) + def delegate(self, prefix, target, extra_args=0): + """Get the target method that corresponds to this command. + The `prefix` is prepended to the command name and then the resulting + name is used to search `target` for a method with a compatible number + of arguments. + """ + # Attempt to get correct command function. + func_name = prefix + self.name + if not hasattr(target, func_name): + raise AttributeError(f'unknown command "{self.name}"') + func = getattr(target, func_name) + + argspec = inspect.getfullargspec(func) + + # Check that `func` is able to handle the number of arguments sent + # by the client (so we can raise ERROR_ARG instead of ERROR_SYSTEM). + # Maximum accepted arguments: argspec includes "self". + max_args = len(argspec.args) - 1 - extra_args + # Minimum accepted arguments: some arguments might be optional. + min_args = max_args + if argspec.defaults: + min_args -= len(argspec.defaults) + wrong_num = (len(self.args) > max_args) or (len(self.args) < min_args) + # If the command accepts a variable number of arguments skip the check. + if wrong_num and not argspec.varargs: + raise TypeError('wrong number of arguments for "{}"' + .format(self.name), self.name) + + return func + def run(self, conn): """A coroutine that executes the command on the given connection. """ - # Attempt to get correct command function. - func_name = 'cmd_' + self.name - if not hasattr(conn.server, func_name): - raise BPDError(ERROR_UNKNOWN, u'unknown command', self.name) - func = getattr(conn.server, func_name) + try: + # `conn` is an extra argument to all cmd handlers. + func = self.delegate('cmd_', conn.server, extra_args=1) + except AttributeError as e: + raise BPDError(ERROR_UNKNOWN, e.args[0]) + except TypeError as e: + raise BPDError(ERROR_ARG, e.args[0], self.name) # Ensure we have permission for this command. if conn.server.password and \ not conn.authenticated and \ self.name not in SAFE_COMMANDS: - raise BPDError(ERROR_PERMISSION, u'insufficient privileges') + raise BPDError(ERROR_PERMISSION, 'insufficient privileges') try: args = [conn] + self.args @@ -697,10 +1035,13 @@ class Command(object): # it on the Connection. raise - except Exception as e: + except BPDIdle: + raise + + except Exception: # An "unintentional" error. Hide it from the client. - log.error('{}', traceback.format_exc(e)) - raise BPDError(ERROR_SYSTEM, u'server error', self.name) + conn.server._log.error('{}', traceback.format_exc()) + raise BPDError(ERROR_SYSTEM, 'server error', self.name) class CommandList(list): @@ -729,7 +1070,7 @@ class CommandList(list): e.index = i # Give the error the correct index. raise e - # Otherwise, possibly send the output delimeter if we're in a + # Otherwise, possibly send the output delimiter if we're in a # verbose ("OK") command list. if self.verbose: yield conn.send(RESP_CLIST_VERBOSE) @@ -743,7 +1084,7 @@ class Server(BaseServer): to store its library. """ - def __init__(self, library, host, port, password): + def __init__(self, library, host, port, password, ctrl_port, log): try: from beetsplug.bpd import gstplayer except ImportError as e: @@ -752,65 +1093,80 @@ class Server(BaseServer): raise NoGstreamerError() else: raise - super(Server, self).__init__(host, port, password) + log.info('Starting server...') + super().__init__(host, port, password, ctrl_port, log) self.lib = library self.player = gstplayer.GstPlayer(self.play_finished) self.cmd_update(None) + log.info('Server ready and listening on {}:{}'.format( + host, port)) + log.debug('Listening for control signals on {}:{}'.format( + host, ctrl_port)) def run(self): self.player.run() - super(Server, self).run() + super().run() def play_finished(self): - """A callback invoked every time our player finishes a - track. + """A callback invoked every time our player finishes a track. """ self.cmd_next(None) + self._ctrl_send('play_finished') # Metadata helper functions. def _item_info(self, item): info_lines = [ - u'file: ' + item.destination(fragment=True), - u'Time: ' + six.text_type(int(item.length)), - u'Title: ' + item.title, - u'Artist: ' + item.artist, - u'Album: ' + item.album, - u'Genre: ' + item.genre, + 'file: ' + item.destination(fragment=True), + 'Time: ' + str(int(item.length)), + 'duration: ' + f'{item.length:.3f}', + 'Id: ' + str(item.id), ] - track = six.text_type(item.track) - if item.tracktotal: - track += u'/' + six.text_type(item.tracktotal) - info_lines.append(u'Track: ' + track) - - info_lines.append(u'Date: ' + six.text_type(item.year)) - try: pos = self._id_to_index(item.id) - info_lines.append(u'Pos: ' + six.text_type(pos)) + info_lines.append('Pos: ' + str(pos)) except ArgumentNotFoundError: # Don't include position if not in playlist. pass - info_lines.append(u'Id: ' + six.text_type(item.id)) + for tagtype, field in self.tagtype_map.items(): + info_lines.append('{}: {}'.format( + tagtype, str(getattr(item, field)))) return info_lines + def _parse_range(self, items, accept_single_number=False): + """Convert a range of positions to a list of item info. + MPD specifies ranges as START:STOP (endpoint excluded) for some + commands. Sometimes a single number can be provided instead. + """ + try: + start, stop = str(items).split(':', 1) + except ValueError: + if accept_single_number: + return [cast_arg(int, items)] + raise BPDError(ERROR_ARG, 'bad range syntax') + start = cast_arg(int, start) + stop = cast_arg(int, stop) + return range(start, stop) + def _item_id(self, item): return item.id # Database updating. - def cmd_update(self, conn, path=u'/'): + def cmd_update(self, conn, path='/'): """Updates the catalog to reflect the current database state. """ # Path is ignored. Also, the real MPD does this asynchronously; # this is done inline. - print(u'Building directory tree...') + self._log.debug('Building directory tree...') self.tree = vfs.libtree(self.lib) - print(u'... done.') + self._log.debug('Finished building directory tree.') self.updated_time = time.time() + self._send_event('update') + self._send_event('database') # Path (directory tree) browsing. @@ -818,7 +1174,7 @@ class Server(BaseServer): """Returns a VFS node or an item ID located at the path given. If the path does not exist, raises a """ - components = path.split(u'/') + components = path.split('/') node = self.tree for component in components: @@ -840,25 +1196,25 @@ class Server(BaseServer): def _path_join(self, p1, p2): """Smashes together two BPD paths.""" - out = p1 + u'/' + p2 - return out.replace(u'//', u'/').replace(u'//', u'/') + out = p1 + '/' + p2 + return out.replace('//', '/').replace('//', '/') - def cmd_lsinfo(self, conn, path=u"/"): + def cmd_lsinfo(self, conn, path="/"): """Sends info on all the items in the path.""" node = self._resolve_path(path) if isinstance(node, int): # Trying to list a track. - raise BPDError(ERROR_ARG, u'this is not a directory') + raise BPDError(ERROR_ARG, 'this is not a directory') else: for name, itemid in iter(sorted(node.files.items())): item = self.lib.get_item(itemid) yield self._item_info(item) for name, _ in iter(sorted(node.dirs.items())): dirpath = self._path_join(path, name) - if dirpath.startswith(u"/"): + if dirpath.startswith("/"): # Strip leading slash (libmpc rejects this). dirpath = dirpath[1:] - yield u'directory: %s' % dirpath + yield 'directory: %s' % dirpath def _listall(self, basepath, node, info=False): """Helper function for recursive listing. If info, show @@ -870,25 +1226,23 @@ class Server(BaseServer): item = self.lib.get_item(node) yield self._item_info(item) else: - yield u'file: ' + basepath + yield 'file: ' + basepath else: # List a directory. Recurse into both directories and files. for name, itemid in sorted(node.files.items()): newpath = self._path_join(basepath, name) # "yield from" - for v in self._listall(newpath, itemid, info): - yield v + yield from self._listall(newpath, itemid, info) for name, subdir in sorted(node.dirs.items()): newpath = self._path_join(basepath, name) - yield u'directory: ' + newpath - for v in self._listall(newpath, subdir, info): - yield v + yield 'directory: ' + newpath + yield from self._listall(newpath, subdir, info) - def cmd_listall(self, conn, path=u"/"): + def cmd_listall(self, conn, path="/"): """Send the paths all items in the directory, recursively.""" return self._listall(path, self._resolve_path(path), False) - def cmd_listallinfo(self, conn, path=u"/"): + def cmd_listallinfo(self, conn, path="/"): """Send info on all the items in the directory, recursively.""" return self._listall(path, self._resolve_path(path), True) @@ -905,11 +1259,9 @@ class Server(BaseServer): # Recurse into a directory. for name, itemid in sorted(node.files.items()): # "yield from" - for v in self._all_items(itemid): - yield v + yield from self._all_items(itemid) for name, subdir in sorted(node.dirs.items()): - for v in self._all_items(subdir): - yield v + yield from self._all_items(subdir) def _add(self, path, send_id=False): """Adds a track or directory to the playlist, specified by the @@ -918,8 +1270,9 @@ class Server(BaseServer): for item in self._all_items(self._resolve_path(path)): self.playlist.append(item) if send_id: - yield u'Id: ' + six.text_type(item.id) + yield 'Id: ' + str(item.id) self.playlist_version += 1 + self._send_event('playlist') def cmd_add(self, conn, path): """Adds a track or directory to the playlist, specified by a @@ -934,16 +1287,28 @@ class Server(BaseServer): # Server info. def cmd_status(self, conn): - for line in super(Server, self).cmd_status(conn): - yield line + yield from super().cmd_status(conn) if self.current_index > -1: item = self.playlist[self.current_index] - yield u'bitrate: ' + six.text_type(item.bitrate / 1000) - # Missing 'audio'. + yield ( + 'bitrate: ' + str(item.bitrate / 1000), + 'audio: {}:{}:{}'.format( + str(item.samplerate), + str(item.bitdepth), + str(item.channels), + ), + ) (pos, total) = self.player.time() - yield u'time: ' + six.text_type(pos) + u':' + six.text_type(total) + yield ( + 'time: {}:{}'.format( + str(int(pos)), + str(int(total)), + ), + 'elapsed: ' + f'{pos:.3f}', + 'duration: ' + f'{total:.3f}', + ) # Also missing 'updating_db'. @@ -958,31 +1323,47 @@ class Server(BaseServer): artists, albums, songs, totaltime = tx.query(statement)[0] yield ( - u'artists: ' + six.text_type(artists), - u'albums: ' + six.text_type(albums), - u'songs: ' + six.text_type(songs), - u'uptime: ' + six.text_type(int(time.time() - self.startup_time)), - u'playtime: ' + u'0', # Missing. - u'db_playtime: ' + six.text_type(int(totaltime)), - u'db_update: ' + six.text_type(int(self.updated_time)), + 'artists: ' + str(artists), + 'albums: ' + str(albums), + 'songs: ' + str(songs), + 'uptime: ' + str(int(time.time() - self.startup_time)), + 'playtime: ' + '0', # Missing. + 'db_playtime: ' + str(int(totaltime)), + 'db_update: ' + str(int(self.updated_time)), ) + def cmd_decoders(self, conn): + """Send list of supported decoders and formats.""" + decoders = self.player.get_decoders() + for name, (mimes, exts) in decoders.items(): + yield f'plugin: {name}' + for ext in exts: + yield f'suffix: {ext}' + for mime in mimes: + yield f'mime_type: {mime}' + # Searching. tagtype_map = { - u'Artist': u'artist', - u'Album': u'album', - u'Title': u'title', - u'Track': u'track', - u'AlbumArtist': u'albumartist', - u'AlbumArtistSort': u'albumartist_sort', - # Name? - u'Genre': u'genre', - u'Date': u'year', - u'Composer': u'composer', - # Performer? - u'Disc': u'disc', - u'filename': u'path', # Suspect. + 'Artist': 'artist', + 'ArtistSort': 'artist_sort', + 'Album': 'album', + 'Title': 'title', + 'Track': 'track', + 'AlbumArtist': 'albumartist', + 'AlbumArtistSort': 'albumartist_sort', + 'Label': 'label', + 'Genre': 'genre', + 'Date': 'year', + 'OriginalDate': 'original_year', + 'Composer': 'composer', + 'Disc': 'disc', + 'Comment': 'comments', + 'MUSICBRAINZ_TRACKID': 'mb_trackid', + 'MUSICBRAINZ_ALBUMID': 'mb_albumid', + 'MUSICBRAINZ_ARTISTID': 'mb_artistid', + 'MUSICBRAINZ_ALBUMARTISTID': 'mb_albumartistid', + 'MUSICBRAINZ_RELEASETRACKID': 'mb_releasetrackid', } def cmd_tagtypes(self, conn): @@ -990,7 +1371,7 @@ class Server(BaseServer): searching. """ for tag in self.tagtype_map: - yield u'tagtype: ' + tag + yield 'tagtype: ' + tag def _tagtype_lookup(self, tag): """Uses `tagtype_map` to look up the beets column name for an @@ -1002,7 +1383,7 @@ class Server(BaseServer): # Match case-insensitively. if test_tag.lower() == tag.lower(): return test_tag, key - raise BPDError(ERROR_UNKNOWN, u'no such tagtype') + raise BPDError(ERROR_UNKNOWN, 'no such tagtype') def _metadata_query(self, query_type, any_query_type, kv): """Helper function returns a query object that will find items @@ -1015,13 +1396,13 @@ class Server(BaseServer): # Iterate pairwise over the arguments. it = iter(kv) for tag, value in zip(it, it): - if tag.lower() == u'any': + if tag.lower() == 'any': if any_query_type: queries.append(any_query_type(value, ITEM_KEYS_WRITABLE, query_type)) else: - raise BPDError(ERROR_UNKNOWN, u'no such tagtype') + raise BPDError(ERROR_UNKNOWN, 'no such tagtype') else: _, key = self._tagtype_lookup(tag) queries.append(query_type(key, value)) @@ -1050,17 +1431,32 @@ class Server(BaseServer): filtered by matching match_tag to match_term. """ show_tag_canon, show_key = self._tagtype_lookup(show_tag) + if len(kv) == 1: + if show_tag_canon == 'Album': + # If no tag was given, assume artist. This is because MPD + # supports a short version of this command for fetching the + # albums belonging to a particular artist, and some clients + # rely on this behaviour (e.g. MPDroid, M.A.L.P.). + kv = ('Artist', kv[0]) + else: + raise BPDError(ERROR_ARG, 'should be "Album" for 3 arguments') + elif len(kv) % 2 != 0: + raise BPDError(ERROR_ARG, 'Incorrect number of filter arguments') query = self._metadata_query(dbcore.query.MatchQuery, None, kv) clause, subvals = query.clause() statement = 'SELECT DISTINCT ' + show_key + \ ' FROM items WHERE ' + clause + \ ' ORDER BY ' + show_key + self._log.debug(statement) with self.lib.transaction() as tx: rows = tx.query(statement, subvals) for row in rows: - yield show_tag_canon + u': ' + six.text_type(row[0]) + if not row[0]: + # Skip any empty values of the field. + continue + yield show_tag_canon + ': ' + str(row[0]) def cmd_count(self, conn, tag, value): """Returns the number and total time of songs matching the @@ -1072,8 +1468,44 @@ class Server(BaseServer): for item in self.lib.items(dbcore.query.MatchQuery(key, value)): songs += 1 playtime += item.length - yield u'songs: ' + six.text_type(songs) - yield u'playtime: ' + six.text_type(int(playtime)) + yield 'songs: ' + str(songs) + yield 'playtime: ' + str(int(playtime)) + + # Persistent playlist manipulation. In MPD this is an optional feature so + # these dummy implementations match MPD's behaviour with the feature off. + + def cmd_listplaylist(self, conn, playlist): + raise BPDError(ERROR_NO_EXIST, 'No such playlist') + + def cmd_listplaylistinfo(self, conn, playlist): + raise BPDError(ERROR_NO_EXIST, 'No such playlist') + + def cmd_listplaylists(self, conn): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_load(self, conn, playlist): + raise BPDError(ERROR_NO_EXIST, 'Stored playlists are disabled') + + def cmd_playlistadd(self, conn, playlist, uri): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_playlistclear(self, conn, playlist): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_playlistdelete(self, conn, playlist, index): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_playlistmove(self, conn, playlist, from_index, to_index): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_rename(self, conn, playlist, new_name): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_rm(self, conn, playlist): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_save(self, conn, playlist): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') # "Outputs." Just a dummy implementation because we don't control # any outputs. @@ -1081,9 +1513,9 @@ class Server(BaseServer): def cmd_outputs(self, conn): """List the available outputs.""" yield ( - u'outputid: 0', - u'outputname: gstreamer', - u'outputenabled: 1', + 'outputid: 0', + 'outputname: gstreamer', + 'outputenabled: 1', ) def cmd_enableoutput(self, conn, output_id): @@ -1094,7 +1526,7 @@ class Server(BaseServer): def cmd_disableoutput(self, conn, output_id): output_id = cast_arg(int, output_id) if output_id == 0: - raise BPDError(ERROR_ARG, u'cannot disable this output') + raise BPDError(ERROR_ARG, 'cannot disable this output') else: raise ArgumentIndexError() @@ -1105,7 +1537,7 @@ class Server(BaseServer): def cmd_play(self, conn, index=-1): new_index = index != -1 and index != self.current_index was_paused = self.paused - super(Server, self).cmd_play(conn, index) + super().cmd_play(conn, index) if self.current_index > -1: # Not stopped. if was_paused and not new_index: @@ -1115,28 +1547,28 @@ class Server(BaseServer): self.player.play_file(self.playlist[self.current_index].path) def cmd_pause(self, conn, state=None): - super(Server, self).cmd_pause(conn, state) + super().cmd_pause(conn, state) if self.paused: self.player.pause() elif self.player.playing: self.player.play() def cmd_stop(self, conn): - super(Server, self).cmd_stop(conn) + super().cmd_stop(conn) self.player.stop() def cmd_seek(self, conn, index, pos): """Seeks to the specified position in the specified song.""" index = cast_arg(int, index) - pos = cast_arg(int, pos) - super(Server, self).cmd_seek(conn, index, pos) + pos = cast_arg(float, pos) + super().cmd_seek(conn, index, pos) self.player.seek(pos) # Volume control. def cmd_setvol(self, conn, vol): vol = cast_arg(int, vol) - super(Server, self).cmd_setvol(conn, vol) + super().cmd_setvol(conn, vol) self.player.volume = float(vol) / 100 @@ -1147,37 +1579,30 @@ class BPDPlugin(BeetsPlugin): server. """ def __init__(self): - super(BPDPlugin, self).__init__() + super().__init__() self.config.add({ - 'host': u'', + 'host': '', 'port': 6600, - 'password': u'', + 'control_port': 6601, + 'password': '', 'volume': VOLUME_MAX, }) self.config['password'].redact = True - def start_bpd(self, lib, host, port, password, volume, debug): + def start_bpd(self, lib, host, port, password, volume, ctrl_port): """Starts a BPD server.""" - if debug: # FIXME this should be managed by BeetsPlugin - self._log.setLevel(logging.DEBUG) - else: - self._log.setLevel(logging.WARNING) try: - server = Server(lib, host, port, password) + server = Server(lib, host, port, password, ctrl_port, self._log) server.cmd_setvol(None, volume) server.run() except NoGstreamerError: - global_log.error(u'Gstreamer Python bindings not found.') - global_log.error(u'Install "gstreamer1.0" and "python-gi"' - u'or similar package to use BPD.') + self._log.error('Gstreamer Python bindings not found.') + self._log.error('Install "gstreamer1.0" and "python-gi"' + 'or similar package to use BPD.') def commands(self): cmd = beets.ui.Subcommand( - 'bpd', help=u'run an MPD-compatible music player server' - ) - cmd.parser.add_option( - '-d', '--debug', action='store_true', - help=u'dump all MPD traffic to stdout' + 'bpd', help='run an MPD-compatible music player server' ) def func(lib, opts, args): @@ -1185,11 +1610,15 @@ class BPDPlugin(BeetsPlugin): host = args.pop(0) if args else host port = args.pop(0) if args else self.config['port'].get(int) if args: - raise beets.ui.UserError(u'too many arguments') + ctrl_port = args.pop(0) + else: + ctrl_port = self.config['control_port'].get(int) + if args: + raise beets.ui.UserError('too many arguments') password = self.config['password'].as_str() volume = self.config['volume'].get(int) - debug = opts.debug or False - self.start_bpd(lib, host, int(port), password, volume, debug) + self.start_bpd(lib, host, int(port), password, volume, + int(ctrl_port)) cmd.func = func return [cmd] diff --git a/libs/common/beetsplug/bpd/gstplayer.py b/libs/common/beetsplug/bpd/gstplayer.py index 705692aa..64954b1c 100644 --- a/libs/common/beetsplug/bpd/gstplayer.py +++ b/libs/common/beetsplug/bpd/gstplayer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -17,15 +16,13 @@ music player. """ -from __future__ import division, absolute_import, print_function -import six import sys import time -from six.moves import _thread +import _thread import os import copy -from six.moves import urllib +import urllib from beets import ui import gi @@ -40,7 +37,7 @@ class QueryError(Exception): pass -class GstPlayer(object): +class GstPlayer: """A music player abstracting GStreamer's Playbin element. Create a player object, then call run() to start a thread with a @@ -64,7 +61,8 @@ class GstPlayer(object): """ # Set up the Gstreamer player. From the pygst tutorial: - # http://pygstdocs.berlios.de/pygst-tutorial/playbin.html + # https://pygstdocs.berlios.de/pygst-tutorial/playbin.html (gone) + # https://brettviren.github.io/pygst-tutorial-org/pygst-tutorial.html #### # Updated to GStreamer 1.0 with: # https://wiki.ubuntu.com/Novacut/GStreamer1.0 @@ -109,7 +107,7 @@ class GstPlayer(object): # error self.player.set_state(Gst.State.NULL) err, debug = message.parse_error() - print(u"Error: {0}".format(err)) + print(f"Error: {err}") self.playing = False def _set_volume(self, volume): @@ -129,7 +127,7 @@ class GstPlayer(object): path. """ self.player.set_state(Gst.State.NULL) - if isinstance(path, six.text_type): + if isinstance(path, str): path = path.encode('utf-8') uri = 'file://' + urllib.parse.quote(path) self.player.set_property("uri", uri) @@ -177,12 +175,12 @@ class GstPlayer(object): posq = self.player.query_position(fmt) if not posq[0]: raise QueryError("query_position failed") - pos = posq[1] // (10 ** 9) + pos = posq[1] / (10 ** 9) lengthq = self.player.query_duration(fmt) if not lengthq[0]: raise QueryError("query_duration failed") - length = lengthq[1] // (10 ** 9) + length = lengthq[1] / (10 ** 9) self.cached_time = (pos, length) return (pos, length) @@ -215,6 +213,59 @@ class GstPlayer(object): while self.playing: time.sleep(1) + def get_decoders(self): + return get_decoders() + + +def get_decoders(): + """Get supported audio decoders from GStreamer. + Returns a dict mapping decoder element names to the associated media types + and file extensions. + """ + # We only care about audio decoder elements. + filt = (Gst.ELEMENT_FACTORY_TYPE_DEPAYLOADER | + Gst.ELEMENT_FACTORY_TYPE_DEMUXER | + Gst.ELEMENT_FACTORY_TYPE_PARSER | + Gst.ELEMENT_FACTORY_TYPE_DECODER | + Gst.ELEMENT_FACTORY_TYPE_MEDIA_AUDIO) + + decoders = {} + mime_types = set() + for f in Gst.ElementFactory.list_get_elements(filt, Gst.Rank.NONE): + for pad in f.get_static_pad_templates(): + if pad.direction == Gst.PadDirection.SINK: + caps = pad.static_caps.get() + mimes = set() + for i in range(caps.get_size()): + struct = caps.get_structure(i) + mime = struct.get_name() + if mime == 'unknown/unknown': + continue + mimes.add(mime) + mime_types.add(mime) + if mimes: + decoders[f.get_name()] = (mimes, set()) + + # Check all the TypeFindFactory plugin features form the registry. If they + # are associated with an audio media type that we found above, get the list + # of corresponding file extensions. + mime_extensions = {mime: set() for mime in mime_types} + for feat in Gst.Registry.get().get_feature_list(Gst.TypeFindFactory): + caps = feat.get_caps() + if caps: + for i in range(caps.get_size()): + struct = caps.get_structure(i) + mime = struct.get_name() + if mime in mime_types: + mime_extensions[mime].update(feat.get_extensions()) + + # Fill in the slot we left for file extensions. + for name, (mimes, exts) in decoders.items(): + for mime in mimes: + exts.update(mime_extensions[mime]) + + return decoders + def play_simple(paths): """Play the files in paths in a straightforward way, without diff --git a/libs/common/beetsplug/bpm.py b/libs/common/beetsplug/bpm.py index 20218bd3..5aa2d95a 100644 --- a/libs/common/beetsplug/bpm.py +++ b/libs/common/beetsplug/bpm.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, aroquen # @@ -15,10 +14,8 @@ """Determine BPM by pressing a key to the rhythm.""" -from __future__ import division, absolute_import, print_function import time -from six.moves import input from beets import ui from beets.plugins import BeetsPlugin @@ -51,16 +48,16 @@ def bpm(max_strokes): class BPMPlugin(BeetsPlugin): def __init__(self): - super(BPMPlugin, self).__init__() + super().__init__() self.config.add({ - u'max_strokes': 3, - u'overwrite': True, + 'max_strokes': 3, + 'overwrite': True, }) def commands(self): cmd = ui.Subcommand('bpm', - help=u'determine bpm of a song by pressing ' - u'a key to the rhythm') + help='determine bpm of a song by pressing ' + 'a key to the rhythm') cmd.func = self.command return [cmd] @@ -72,19 +69,19 @@ class BPMPlugin(BeetsPlugin): def get_bpm(self, items, write=False): overwrite = self.config['overwrite'].get(bool) if len(items) > 1: - raise ValueError(u'Can only get bpm of one song at time') + raise ValueError('Can only get bpm of one song at time') item = items[0] if item['bpm']: - self._log.info(u'Found bpm {0}', item['bpm']) + self._log.info('Found bpm {0}', item['bpm']) if not overwrite: return - self._log.info(u'Press Enter {0} times to the rhythm or Ctrl-D ' - u'to exit', self.config['max_strokes'].get(int)) + self._log.info('Press Enter {0} times to the rhythm or Ctrl-D ' + 'to exit', self.config['max_strokes'].get(int)) new_bpm = bpm(self.config['max_strokes'].get(int)) item['bpm'] = int(new_bpm) if write: item.try_write() item.store() - self._log.info(u'Added new bpm {0}', item['bpm']) + self._log.info('Added new bpm {0}', item['bpm']) diff --git a/libs/common/beetsplug/bpsync.py b/libs/common/beetsplug/bpsync.py new file mode 100644 index 00000000..5b28d6d2 --- /dev/null +++ b/libs/common/beetsplug/bpsync.py @@ -0,0 +1,186 @@ +# This file is part of beets. +# Copyright 2019, Rahul Ahuja. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Update library's tags using Beatport. +""" + +from beets.plugins import BeetsPlugin, apply_item_changes +from beets import autotag, library, ui, util + +from .beatport import BeatportPlugin + + +class BPSyncPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + self.beatport_plugin = BeatportPlugin() + self.beatport_plugin.setup() + + def commands(self): + cmd = ui.Subcommand('bpsync', help='update metadata from Beatport') + cmd.parser.add_option( + '-p', + '--pretend', + action='store_true', + help='show all changes but do nothing', + ) + cmd.parser.add_option( + '-m', + '--move', + action='store_true', + dest='move', + help="move files in the library directory", + ) + cmd.parser.add_option( + '-M', + '--nomove', + action='store_false', + dest='move', + help="don't move files in library", + ) + cmd.parser.add_option( + '-W', + '--nowrite', + action='store_false', + default=None, + dest='write', + help="don't write updated metadata to files", + ) + cmd.parser.add_format_option() + cmd.func = self.func + return [cmd] + + def func(self, lib, opts, args): + """Command handler for the bpsync function. + """ + move = ui.should_move(opts.move) + pretend = opts.pretend + write = ui.should_write(opts.write) + query = ui.decargs(args) + + self.singletons(lib, query, move, pretend, write) + self.albums(lib, query, move, pretend, write) + + def singletons(self, lib, query, move, pretend, write): + """Retrieve and apply info from the autotagger for items matched by + query. + """ + for item in lib.items(query + ['singleton:true']): + if not item.mb_trackid: + self._log.info( + 'Skipping singleton with no mb_trackid: {}', item + ) + continue + + if not self.is_beatport_track(item): + self._log.info( + 'Skipping non-{} singleton: {}', + self.beatport_plugin.data_source, + item, + ) + continue + + # Apply. + trackinfo = self.beatport_plugin.track_for_id(item.mb_trackid) + with lib.transaction(): + autotag.apply_item_metadata(item, trackinfo) + apply_item_changes(lib, item, move, pretend, write) + + @staticmethod + def is_beatport_track(item): + return ( + item.get('data_source') == BeatportPlugin.data_source + and item.mb_trackid.isnumeric() + ) + + def get_album_tracks(self, album): + if not album.mb_albumid: + self._log.info('Skipping album with no mb_albumid: {}', album) + return False + if not album.mb_albumid.isnumeric(): + self._log.info( + 'Skipping album with invalid {} ID: {}', + self.beatport_plugin.data_source, + album, + ) + return False + items = list(album.items()) + if album.get('data_source') == self.beatport_plugin.data_source: + return items + if not all(self.is_beatport_track(item) for item in items): + self._log.info( + 'Skipping non-{} release: {}', + self.beatport_plugin.data_source, + album, + ) + return False + return items + + def albums(self, lib, query, move, pretend, write): + """Retrieve and apply info from the autotagger for albums matched by + query and their items. + """ + # Process matching albums. + for album in lib.albums(query): + # Do we have a valid Beatport album? + items = self.get_album_tracks(album) + if not items: + continue + + # Get the Beatport album information. + albuminfo = self.beatport_plugin.album_for_id(album.mb_albumid) + if not albuminfo: + self._log.info( + 'Release ID {} not found for album {}', + album.mb_albumid, + album, + ) + continue + + beatport_trackid_to_trackinfo = { + track.track_id: track for track in albuminfo.tracks + } + library_trackid_to_item = { + int(item.mb_trackid): item for item in items + } + item_to_trackinfo = { + item: beatport_trackid_to_trackinfo[track_id] + for track_id, item in library_trackid_to_item.items() + } + + self._log.info('applying changes to {}', album) + with lib.transaction(): + autotag.apply_metadata(albuminfo, item_to_trackinfo) + changed = False + # Find any changed item to apply Beatport changes to album. + any_changed_item = items[0] + for item in items: + item_changed = ui.show_model_changes(item) + changed |= item_changed + if item_changed: + any_changed_item = item + apply_item_changes(lib, item, move, pretend, write) + + if pretend or not changed: + continue + + # Update album structure to reflect an item in it. + for key in library.Album.item_keys: + album[key] = any_changed_item[key] + album.store() + + # Move album art (and any inconsistent items). + if move and lib.directory in util.ancestry(items[0].path): + self._log.debug('moving album {}', album) + album.move() diff --git a/libs/common/beetsplug/bucket.py b/libs/common/beetsplug/bucket.py index c4be2a3d..9ed50b45 100644 --- a/libs/common/beetsplug/bucket.py +++ b/libs/common/beetsplug/bucket.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Fabrice Laporte. # @@ -16,12 +15,10 @@ """Provides the %bucket{} function for path formatting. """ -from __future__ import division, absolute_import, print_function from datetime import datetime import re import string -from six.moves import zip from itertools import tee from beets import plugins, ui @@ -49,7 +46,7 @@ def span_from_str(span_str): """Convert string to a 4 digits year """ if yearfrom < 100: - raise BucketError(u"%d must be expressed on 4 digits" % yearfrom) + raise BucketError("%d must be expressed on 4 digits" % yearfrom) # if two digits only, pick closest year that ends by these two # digits starting from yearfrom @@ -60,14 +57,14 @@ def span_from_str(span_str): d = (yearfrom - yearfrom % 100) + d return d - years = [int(x) for x in re.findall('\d+', span_str)] + years = [int(x) for x in re.findall(r'\d+', span_str)] if not years: - raise ui.UserError(u"invalid range defined for year bucket '%s': no " - u"year found" % span_str) + raise ui.UserError("invalid range defined for year bucket '%s': no " + "year found" % span_str) try: years = [normalize_year(x, years[0]) for x in years] except BucketError as exc: - raise ui.UserError(u"invalid range defined for year bucket '%s': %s" % + raise ui.UserError("invalid range defined for year bucket '%s': %s" % (span_str, exc)) res = {'from': years[0], 'str': span_str} @@ -128,10 +125,10 @@ def str2fmt(s): res = {'fromnchars': len(m.group('fromyear')), 'tonchars': len(m.group('toyear'))} - res['fmt'] = "%s%%s%s%s%s" % (m.group('bef'), - m.group('sep'), - '%s' if res['tonchars'] else '', - m.group('after')) + res['fmt'] = "{}%s{}{}{}".format(m.group('bef'), + m.group('sep'), + '%s' if res['tonchars'] else '', + m.group('after')) return res @@ -170,8 +167,8 @@ def build_alpha_spans(alpha_spans_str, alpha_regexs): begin_index = ASCII_DIGITS.index(bucket[0]) end_index = ASCII_DIGITS.index(bucket[-1]) else: - raise ui.UserError(u"invalid range defined for alpha bucket " - u"'%s': no alphanumeric character found" % + raise ui.UserError("invalid range defined for alpha bucket " + "'%s': no alphanumeric character found" % elem) spans.append( re.compile( @@ -184,7 +181,7 @@ def build_alpha_spans(alpha_spans_str, alpha_regexs): class BucketPlugin(plugins.BeetsPlugin): def __init__(self): - super(BucketPlugin, self).__init__() + super().__init__() self.template_funcs['bucket'] = self._tmpl_bucket self.config.add({ diff --git a/libs/common/beetsplug/chroma.py b/libs/common/beetsplug/chroma.py index 57472956..353923aa 100644 --- a/libs/common/beetsplug/chroma.py +++ b/libs/common/beetsplug/chroma.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,16 +15,17 @@ """Adds Chromaprint/Acoustid acoustic fingerprinting support to the autotagger. Requires the pyacoustid library. """ -from __future__ import division, absolute_import, print_function from beets import plugins from beets import ui from beets import util from beets import config -from beets.util import confit from beets.autotag import hooks +import confuse import acoustid from collections import defaultdict +from functools import partial +import re API_KEY = '1vOwZtEn' SCORE_THRESH = 0.5 @@ -57,6 +57,30 @@ def prefix(it, count): yield v +def releases_key(release, countries, original_year): + """Used as a key to sort releases by date then preferred country + """ + date = release.get('date') + if date and original_year: + year = date.get('year', 9999) + month = date.get('month', 99) + day = date.get('day', 99) + else: + year = 9999 + month = 99 + day = 99 + + # Uses index of preferred countries to sort + country_key = 99 + if release.get('country'): + for i, country in enumerate(countries): + if country.match(release['country']): + country_key = i + break + + return (year, month, day, country_key) + + def acoustid_match(log, path): """Gets metadata for a file from Acoustid and populates the _matches, _fingerprints, and _acoustids dictionaries accordingly. @@ -64,42 +88,55 @@ def acoustid_match(log, path): try: duration, fp = acoustid.fingerprint_file(util.syspath(path)) except acoustid.FingerprintGenerationError as exc: - log.error(u'fingerprinting of {0} failed: {1}', + log.error('fingerprinting of {0} failed: {1}', util.displayable_path(repr(path)), exc) return None + fp = fp.decode() _fingerprints[path] = fp try: res = acoustid.lookup(API_KEY, fp, duration, meta='recordings releases') except acoustid.AcoustidError as exc: - log.debug(u'fingerprint matching {0} failed: {1}', + log.debug('fingerprint matching {0} failed: {1}', util.displayable_path(repr(path)), exc) return None - log.debug(u'chroma: fingerprinted {0}', + log.debug('chroma: fingerprinted {0}', util.displayable_path(repr(path))) # Ensure the response is usable and parse it. if res['status'] != 'ok' or not res.get('results'): - log.debug(u'no match found') + log.debug('no match found') return None result = res['results'][0] # Best match. if result['score'] < SCORE_THRESH: - log.debug(u'no results above threshold') + log.debug('no results above threshold') return None _acoustids[path] = result['id'] - # Get recording and releases from the result. + # Get recording and releases from the result if not result.get('recordings'): - log.debug(u'no recordings found') + log.debug('no recordings found') return None recording_ids = [] - release_ids = [] + releases = [] for recording in result['recordings']: recording_ids.append(recording['id']) if 'releases' in recording: - release_ids += [rel['id'] for rel in recording['releases']] + releases.extend(recording['releases']) - log.debug(u'matched recordings {0} on releases {1}', + # The releases list is essentially in random order from the Acoustid lookup + # so we optionally sort it using the match.preferred configuration options. + # 'original_year' to sort the earliest first and + # 'countries' to then sort preferred countries first. + country_patterns = config['match']['preferred']['countries'].as_str_seq() + countries = [re.compile(pat, re.I) for pat in country_patterns] + original_year = config['match']['preferred']['original_year'] + releases.sort(key=partial(releases_key, + countries=countries, + original_year=original_year)) + release_ids = [rel['id'] for rel in releases] + + log.debug('matched recordings {0} on releases {1}', recording_ids, release_ids) _matches[path] = recording_ids, release_ids @@ -128,7 +165,7 @@ def _all_releases(items): class AcoustidPlugin(plugins.BeetsPlugin): def __init__(self): - super(AcoustidPlugin, self).__init__() + super().__init__() self.config.add({ 'auto': True, @@ -152,14 +189,14 @@ class AcoustidPlugin(plugins.BeetsPlugin): dist.add_expr('track_id', info.track_id not in recording_ids) return dist - def candidates(self, items, artist, album, va_likely): + def candidates(self, items, artist, album, va_likely, extra_tags=None): albums = [] for relid in prefix(_all_releases(items), MAX_RELEASES): album = hooks.album_for_mbid(relid) if album: albums.append(album) - self._log.debug(u'acoustid album candidates: {0}', len(albums)) + self._log.debug('acoustid album candidates: {0}', len(albums)) return albums def item_candidates(self, item, artist, title): @@ -172,24 +209,24 @@ class AcoustidPlugin(plugins.BeetsPlugin): track = hooks.track_for_mbid(recording_id) if track: tracks.append(track) - self._log.debug(u'acoustid item candidates: {0}', len(tracks)) + self._log.debug('acoustid item candidates: {0}', len(tracks)) return tracks def commands(self): submit_cmd = ui.Subcommand('submit', - help=u'submit Acoustid fingerprints') + help='submit Acoustid fingerprints') def submit_cmd_func(lib, opts, args): try: apikey = config['acoustid']['apikey'].as_str() - except confit.NotFoundError: - raise ui.UserError(u'no Acoustid user API key provided') + except confuse.NotFoundError: + raise ui.UserError('no Acoustid user API key provided') submit_items(self._log, apikey, lib.items(ui.decargs(args))) submit_cmd.func = submit_cmd_func fingerprint_cmd = ui.Subcommand( 'fingerprint', - help=u'generate fingerprints for items without them' + help='generate fingerprints for items without them' ) def fingerprint_cmd_func(lib, opts, args): @@ -232,15 +269,15 @@ def submit_items(log, userkey, items, chunksize=64): def submit_chunk(): """Submit the current accumulated fingerprint data.""" - log.info(u'submitting {0} fingerprints', len(data)) + log.info('submitting {0} fingerprints', len(data)) try: acoustid.submit(API_KEY, userkey, data) except acoustid.AcoustidError as exc: - log.warning(u'acoustid submission error: {0}', exc) + log.warning('acoustid submission error: {0}', exc) del data[:] for item in items: - fp = fingerprint_item(log, item) + fp = fingerprint_item(log, item, write=ui.should_write()) # Construct a submission dictionary for this item. item_data = { @@ -249,7 +286,7 @@ def submit_items(log, userkey, items, chunksize=64): } if item.mb_trackid: item_data['mbid'] = item.mb_trackid - log.debug(u'submitting MBID') + log.debug('submitting MBID') else: item_data.update({ 'track': item.title, @@ -260,7 +297,7 @@ def submit_items(log, userkey, items, chunksize=64): 'trackno': item.track, 'discno': item.disc, }) - log.debug(u'submitting textual metadata') + log.debug('submitting textual metadata') data.append(item_data) # If we have enough data, submit a chunk. @@ -281,28 +318,28 @@ def fingerprint_item(log, item, write=False): """ # Get a fingerprint and length for this track. if not item.length: - log.info(u'{0}: no duration available', + log.info('{0}: no duration available', util.displayable_path(item.path)) elif item.acoustid_fingerprint: if write: - log.info(u'{0}: fingerprint exists, skipping', + log.info('{0}: fingerprint exists, skipping', util.displayable_path(item.path)) else: - log.info(u'{0}: using existing fingerprint', + log.info('{0}: using existing fingerprint', util.displayable_path(item.path)) - return item.acoustid_fingerprint + return item.acoustid_fingerprint else: - log.info(u'{0}: fingerprinting', + log.info('{0}: fingerprinting', util.displayable_path(item.path)) try: _, fp = acoustid.fingerprint_file(util.syspath(item.path)) - item.acoustid_fingerprint = fp + item.acoustid_fingerprint = fp.decode() if write: - log.info(u'{0}: writing fingerprint', + log.info('{0}: writing fingerprint', util.displayable_path(item.path)) item.try_write() if item._db: item.store() return item.acoustid_fingerprint except acoustid.FingerprintGenerationError as exc: - log.info(u'fingerprint generation failed: {0}', exc) + log.info('fingerprint generation failed: {0}', exc) diff --git a/libs/common/beetsplug/convert.py b/libs/common/beetsplug/convert.py index d1223596..6bc07c28 100644 --- a/libs/common/beetsplug/convert.py +++ b/libs/common/beetsplug/convert.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Jakob Schnitzer. # @@ -15,20 +14,18 @@ """Converts tracks or albums to external directory """ -from __future__ import division, absolute_import, print_function +from beets.util import par_map, decode_commandline_path, arg_encoding import os import threading import subprocess import tempfile import shlex -import six from string import Template -import platform from beets import ui, util, plugins, config from beets.plugins import BeetsPlugin -from beets.util.confit import ConfigTypeError +from confuse import ConfigTypeError from beets import art from beets.util.artresizer import ArtResizer from beets.library import parse_query_string @@ -39,8 +36,8 @@ _temp_files = [] # Keep track of temporary transcoded files for deletion. # Some convenient alternate names for formats. ALIASES = { - u'wma': u'windows media', - u'vorbis': u'ogg', + 'wma': 'windows media', + 'vorbis': 'ogg', } LOSSLESS_FORMATS = ['ape', 'flac', 'alac', 'wav', 'aiff'] @@ -68,7 +65,7 @@ def get_format(fmt=None): extension = format_info.get('extension', fmt) except KeyError: raise ui.UserError( - u'convert: format {0} needs the "command" field' + 'convert: format {} needs the "command" field' .format(fmt) ) except ConfigTypeError: @@ -81,7 +78,7 @@ def get_format(fmt=None): command = config['convert']['command'].as_str() elif 'opts' in keys: # Undocumented option for backwards compatibility with < 1.3.1. - command = u'ffmpeg -i $source -y {0} $dest'.format( + command = 'ffmpeg -i $source -y {} $dest'.format( config['convert']['opts'].as_str() ) if 'extension' in keys: @@ -110,70 +107,81 @@ def should_transcode(item, fmt): class ConvertPlugin(BeetsPlugin): def __init__(self): - super(ConvertPlugin, self).__init__() + super().__init__() self.config.add({ - u'dest': None, - u'pretend': False, - u'threads': util.cpu_count(), - u'format': u'mp3', - u'formats': { - u'aac': { - u'command': u'ffmpeg -i $source -y -vn -acodec aac ' - u'-aq 1 $dest', - u'extension': u'm4a', + 'dest': None, + 'pretend': False, + 'link': False, + 'hardlink': False, + 'threads': util.cpu_count(), + 'format': 'mp3', + 'id3v23': 'inherit', + 'formats': { + 'aac': { + 'command': 'ffmpeg -i $source -y -vn -acodec aac ' + '-aq 1 $dest', + 'extension': 'm4a', }, - u'alac': { - u'command': u'ffmpeg -i $source -y -vn -acodec alac $dest', - u'extension': u'm4a', + 'alac': { + 'command': 'ffmpeg -i $source -y -vn -acodec alac $dest', + 'extension': 'm4a', }, - u'flac': u'ffmpeg -i $source -y -vn -acodec flac $dest', - u'mp3': u'ffmpeg -i $source -y -vn -aq 2 $dest', - u'opus': - u'ffmpeg -i $source -y -vn -acodec libopus -ab 96k $dest', - u'ogg': - u'ffmpeg -i $source -y -vn -acodec libvorbis -aq 3 $dest', - u'wma': - u'ffmpeg -i $source -y -vn -acodec wmav2 -vn $dest', + 'flac': 'ffmpeg -i $source -y -vn -acodec flac $dest', + 'mp3': 'ffmpeg -i $source -y -vn -aq 2 $dest', + 'opus': + 'ffmpeg -i $source -y -vn -acodec libopus -ab 96k $dest', + 'ogg': + 'ffmpeg -i $source -y -vn -acodec libvorbis -aq 3 $dest', + 'wma': + 'ffmpeg -i $source -y -vn -acodec wmav2 -vn $dest', }, - u'max_bitrate': 500, - u'auto': False, - u'tmpdir': None, - u'quiet': False, - u'embed': True, - u'paths': {}, - u'no_convert': u'', - u'never_convert_lossy_files': False, - u'copy_album_art': False, - u'album_art_maxwidth': 0, + 'max_bitrate': 500, + 'auto': False, + 'tmpdir': None, + 'quiet': False, + 'embed': True, + 'paths': {}, + 'no_convert': '', + 'never_convert_lossy_files': False, + 'copy_album_art': False, + 'album_art_maxwidth': 0, + 'delete_originals': False, }) self.early_import_stages = [self.auto_convert] self.register_listener('import_task_files', self._cleanup) def commands(self): - cmd = ui.Subcommand('convert', help=u'convert to external location') + cmd = ui.Subcommand('convert', help='convert to external location') cmd.parser.add_option('-p', '--pretend', action='store_true', - help=u'show actions but do nothing') + help='show actions but do nothing') cmd.parser.add_option('-t', '--threads', action='store', type='int', - help=u'change the number of threads, \ + help='change the number of threads, \ defaults to maximum available processors') cmd.parser.add_option('-k', '--keep-new', action='store_true', - dest='keep_new', help=u'keep only the converted \ + dest='keep_new', help='keep only the converted \ and move the old files') cmd.parser.add_option('-d', '--dest', action='store', - help=u'set the destination directory') + help='set the destination directory') cmd.parser.add_option('-f', '--format', action='store', dest='format', - help=u'set the target format of the tracks') + help='set the target format of the tracks') cmd.parser.add_option('-y', '--yes', action='store_true', dest='yes', - help=u'do not ask for confirmation') + help='do not ask for confirmation') + cmd.parser.add_option('-l', '--link', action='store_true', dest='link', + help='symlink files that do not \ + need transcoding.') + cmd.parser.add_option('-H', '--hardlink', action='store_true', + dest='hardlink', + help='hardlink files that do not \ + need transcoding. Overrides --link.') cmd.parser.add_album_option() cmd.func = self.convert_func return [cmd] def auto_convert(self, config, task): if self.config['auto']: - for item in task.imported_items(): - self.convert_on_import(config.lib, item) + par_map(lambda item: self.convert_on_import(config.lib, item), + task.imported_items()) # Utilities converted from functions to methods on logging overhaul @@ -191,22 +199,11 @@ class ConvertPlugin(BeetsPlugin): quiet = self.config['quiet'].get(bool) if not quiet and not pretend: - self._log.info(u'Encoding {0}', util.displayable_path(source)) + self._log.info('Encoding {0}', util.displayable_path(source)) - # On Python 3, we need to construct the command to invoke as a - # Unicode string. On Unix, this is a little unfortunate---the OS is - # expecting bytes---so we use surrogate escaping and decode with the - # argument encoding, which is the same encoding that will then be - # *reversed* to recover the same bytes before invoking the OS. On - # Windows, we want to preserve the Unicode filename "as is." - if not six.PY2: - command = command.decode(util.arg_encoding(), 'surrogateescape') - if platform.system() == 'Windows': - source = source.decode(util._fsencoding()) - dest = dest.decode(util._fsencoding()) - else: - source = source.decode(util.arg_encoding(), 'surrogateescape') - dest = dest.decode(util.arg_encoding(), 'surrogateescape') + command = command.decode(arg_encoding(), 'surrogateescape') + source = decode_commandline_path(source) + dest = decode_commandline_path(dest) # Substitute $source and $dest in the argument list. args = shlex.split(command) @@ -216,22 +213,19 @@ class ConvertPlugin(BeetsPlugin): 'source': source, 'dest': dest, }) - if six.PY2: - encode_cmd.append(args[i]) - else: - encode_cmd.append(args[i].encode(util.arg_encoding())) + encode_cmd.append(args[i].encode(util.arg_encoding())) if pretend: - self._log.info(u'{0}', u' '.join(ui.decargs(args))) + self._log.info('{0}', ' '.join(ui.decargs(args))) return try: util.command_output(encode_cmd) except subprocess.CalledProcessError as exc: # Something went wrong (probably Ctrl+C), remove temporary files - self._log.info(u'Encoding {0} failed. Cleaning up...', + self._log.info('Encoding {0} failed. Cleaning up...', util.displayable_path(source)) - self._log.debug(u'Command {0} exited with status {1}: {2}', + self._log.debug('Command {0} exited with status {1}: {2}', args, exc.returncode, exc.output) @@ -240,17 +234,17 @@ class ConvertPlugin(BeetsPlugin): raise except OSError as exc: raise ui.UserError( - u"convert: couldn't invoke '{0}': {1}".format( - u' '.join(ui.decargs(args)), exc + "convert: couldn't invoke '{}': {}".format( + ' '.join(ui.decargs(args)), exc ) ) if not quiet and not pretend: - self._log.info(u'Finished encoding {0}', + self._log.info('Finished encoding {0}', util.displayable_path(source)) def convert_item(self, dest_dir, keep_new, path_formats, fmt, - pretend=False): + pretend=False, link=False, hardlink=False): """A pipeline thread that converts `Item` objects from a library. """ @@ -283,41 +277,60 @@ class ConvertPlugin(BeetsPlugin): util.mkdirall(dest) if os.path.exists(util.syspath(dest)): - self._log.info(u'Skipping {0} (target file exists)', + self._log.info('Skipping {0} (target file exists)', util.displayable_path(item.path)) continue if keep_new: if pretend: - self._log.info(u'mv {0} {1}', + self._log.info('mv {0} {1}', util.displayable_path(item.path), util.displayable_path(original)) else: - self._log.info(u'Moving to {0}', + self._log.info('Moving to {0}', util.displayable_path(original)) util.move(item.path, original) if should_transcode(item, fmt): + linked = False try: self.encode(command, original, converted, pretend) except subprocess.CalledProcessError: continue else: + linked = link or hardlink if pretend: - self._log.info(u'cp {0} {1}', + msg = 'ln' if hardlink else ('ln -s' if link else 'cp') + + self._log.info('{2} {0} {1}', util.displayable_path(original), - util.displayable_path(converted)) + util.displayable_path(converted), + msg) else: # No transcoding necessary. - self._log.info(u'Copying {0}', - util.displayable_path(item.path)) - util.copy(original, converted) + msg = 'Hardlinking' if hardlink \ + else ('Linking' if link else 'Copying') + + self._log.info('{1} {0}', + util.displayable_path(item.path), + msg) + + if hardlink: + util.hardlink(original, converted) + elif link: + util.link(original, converted) + else: + util.copy(original, converted) if pretend: continue + id3v23 = self.config['id3v23'].as_choice([True, False, 'inherit']) + if id3v23 == 'inherit': + id3v23 = None + # Write tags from the database to the converted file. - item.try_write(path=converted) + item.try_write(path=converted, id3v23=id3v23) if keep_new: # If we're keeping the transcoded file, read it again (after @@ -326,13 +339,13 @@ class ConvertPlugin(BeetsPlugin): item.read() item.store() # Store new path and audio data. - if self.config['embed']: - album = item.get_album() + if self.config['embed'] and not linked: + album = item._cached_album if album and album.artpath: - self._log.debug(u'embedding album art from {}', + self._log.debug('embedding album art from {}', util.displayable_path(album.artpath)) art.embed_item(self._log, item, album.artpath, - itempath=converted) + itempath=converted, id3v23=id3v23) if keep_new: plugins.send('after_convert', item=item, @@ -341,7 +354,8 @@ class ConvertPlugin(BeetsPlugin): plugins.send('after_convert', item=item, dest=converted, keepnew=False) - def copy_album_art(self, album, dest_dir, path_formats, pretend=False): + def copy_album_art(self, album, dest_dir, path_formats, pretend=False, + link=False, hardlink=False): """Copies or converts the associated cover art of the album. Album must have at least one track. """ @@ -369,7 +383,7 @@ class ConvertPlugin(BeetsPlugin): util.mkdirall(dest) if os.path.exists(util.syspath(dest)): - self._log.info(u'Skipping {0} (target file exists)', + self._log.info('Skipping {0} (target file exists)', util.displayable_path(album.artpath)) return @@ -383,31 +397,43 @@ class ConvertPlugin(BeetsPlugin): if size: resize = size[0] > maxwidth else: - self._log.warning(u'Could not get size of image (please see ' - u'documentation for dependencies).') + self._log.warning('Could not get size of image (please see ' + 'documentation for dependencies).') # Either copy or resize (while copying) the image. if resize: - self._log.info(u'Resizing cover art from {0} to {1}', + self._log.info('Resizing cover art from {0} to {1}', util.displayable_path(album.artpath), util.displayable_path(dest)) if not pretend: ArtResizer.shared.resize(maxwidth, album.artpath, dest) else: if pretend: - self._log.info(u'cp {0} {1}', + msg = 'ln' if hardlink else ('ln -s' if link else 'cp') + + self._log.info('{2} {0} {1}', util.displayable_path(album.artpath), - util.displayable_path(dest)) + util.displayable_path(dest), + msg) else: - self._log.info(u'Copying cover art to {0}', + msg = 'Hardlinking' if hardlink \ + else ('Linking' if link else 'Copying') + + self._log.info('{2} cover art from {0} to {1}', util.displayable_path(album.artpath), - util.displayable_path(dest)) - util.copy(album.artpath, dest) + util.displayable_path(dest), + msg) + if hardlink: + util.hardlink(album.artpath, dest) + elif link: + util.link(album.artpath, dest) + else: + util.copy(album.artpath, dest) def convert_func(self, lib, opts, args): dest = opts.dest or self.config['dest'].get() if not dest: - raise ui.UserError(u'no convert destination set') + raise ui.UserError('no convert destination set') dest = util.bytestring_path(dest) threads = opts.threads or self.config['threads'].get(int) @@ -421,33 +447,46 @@ class ConvertPlugin(BeetsPlugin): else: pretend = self.config['pretend'].get(bool) + if opts.hardlink is not None: + hardlink = opts.hardlink + link = False + elif opts.link is not None: + hardlink = False + link = opts.link + else: + hardlink = self.config['hardlink'].get(bool) + link = self.config['link'].get(bool) + if opts.album: albums = lib.albums(ui.decargs(args)) items = [i for a in albums for i in a.items()] if not pretend: for a in albums: - ui.print_(format(a, u'')) + ui.print_(format(a, '')) else: items = list(lib.items(ui.decargs(args))) if not pretend: for i in items: - ui.print_(format(i, u'')) + ui.print_(format(i, '')) if not items: - self._log.error(u'Empty query result.') + self._log.error('Empty query result.') return - if not (pretend or opts.yes or ui.input_yn(u"Convert? (Y/n)")): + if not (pretend or opts.yes or ui.input_yn("Convert? (Y/n)")): return if opts.album and self.config['copy_album_art']: for album in albums: - self.copy_album_art(album, dest, path_formats, pretend) + self.copy_album_art(album, dest, path_formats, pretend, + link, hardlink) convert = [self.convert_item(dest, opts.keep_new, path_formats, fmt, - pretend) + pretend, + link, + hardlink) for _ in range(threads)] pipe = util.pipeline.Pipeline([iter(items), convert]) pipe.run_parallel() @@ -477,11 +516,16 @@ class ConvertPlugin(BeetsPlugin): # Change the newly-imported database entry to point to the # converted file. + source_path = item.path item.path = dest item.write() item.read() # Load new audio information data. item.store() + if self.config['delete_originals']: + self._log.info('Removing original file {0}', source_path) + util.remove(source_path, False) + def _cleanup(self, task, session): for path in task.old_paths: if path in _temp_files: diff --git a/libs/common/beetsplug/cue.py b/libs/common/beetsplug/cue.py deleted file mode 100644 index fd564b55..00000000 --- a/libs/common/beetsplug/cue.py +++ /dev/null @@ -1,57 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2016 Bruno Cauet -# Split an album-file in tracks thanks a cue file - -from __future__ import division, absolute_import, print_function - -import subprocess -from os import path -from glob import glob - -from beets.util import command_output, displayable_path -from beets.plugins import BeetsPlugin -from beets.autotag import TrackInfo - - -class CuePlugin(BeetsPlugin): - def __init__(self): - super(CuePlugin, self).__init__() - # this does not seem supported by shnsplit - self.config.add({ - 'keep_before': .1, - 'keep_after': .9, - }) - - # self.register_listener('import_task_start', self.look_for_cues) - - def candidates(self, items, artist, album, va_likely): - import pdb - pdb.set_trace() - - def item_candidates(self, item, artist, album): - dir = path.dirname(item.path) - cues = glob.glob(path.join(dir, "*.cue")) - if not cues: - return - if len(cues) > 1: - self._log.info(u"Found multiple cue files doing nothing: {0}", - list(map(displayable_path, cues))) - - cue_file = cues[0] - self._log.info("Found {} for {}", displayable_path(cue_file), item) - - try: - # careful: will ask for input in case of conflicts - command_output(['shnsplit', '-f', cue_file, item.path]) - except (subprocess.CalledProcessError, OSError): - self._log.exception(u'shnsplit execution failed') - return - - tracks = glob(path.join(dir, "*.wav")) - self._log.info("Generated {0} tracks", len(tracks)) - for t in tracks: - title = "dunno lol" - track_id = "wtf" - index = int(path.basename(t)[len("split-track"):-len(".wav")]) - yield TrackInfo(title, track_id, index=index, artist=artist) - # generate TrackInfo instances diff --git a/libs/common/beetsplug/deezer.py b/libs/common/beetsplug/deezer.py new file mode 100644 index 00000000..5f158f93 --- /dev/null +++ b/libs/common/beetsplug/deezer.py @@ -0,0 +1,230 @@ +# This file is part of beets. +# Copyright 2019, Rahul Ahuja. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Deezer release and track search support to the autotagger +""" + +import collections + +import unidecode +import requests + +from beets import ui +from beets.autotag import AlbumInfo, TrackInfo +from beets.plugins import MetadataSourcePlugin, BeetsPlugin + + +class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): + data_source = 'Deezer' + + # Base URLs for the Deezer API + # Documentation: https://developers.deezer.com/api/ + search_url = 'https://api.deezer.com/search/' + album_url = 'https://api.deezer.com/album/' + track_url = 'https://api.deezer.com/track/' + + id_regex = { + 'pattern': r'(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)', + 'match_group': 4, + } + + def __init__(self): + super().__init__() + + def album_for_id(self, album_id): + """Fetch an album by its Deezer ID or URL and return an + AlbumInfo object or None if the album is not found. + + :param album_id: Deezer ID or URL for the album. + :type album_id: str + :return: AlbumInfo object for album. + :rtype: beets.autotag.hooks.AlbumInfo or None + """ + deezer_id = self._get_id('album', album_id) + if deezer_id is None: + return None + + album_data = requests.get(self.album_url + deezer_id).json() + artist, artist_id = self.get_artist(album_data['contributors']) + + release_date = album_data['release_date'] + date_parts = [int(part) for part in release_date.split('-')] + num_date_parts = len(date_parts) + + if num_date_parts == 3: + year, month, day = date_parts + elif num_date_parts == 2: + year, month = date_parts + day = None + elif num_date_parts == 1: + year = date_parts[0] + month = None + day = None + else: + raise ui.UserError( + "Invalid `release_date` returned " + "by {} API: '{}'".format(self.data_source, release_date) + ) + + tracks_data = requests.get( + self.album_url + deezer_id + '/tracks' + ).json()['data'] + if not tracks_data: + return None + tracks = [] + medium_totals = collections.defaultdict(int) + for i, track_data in enumerate(tracks_data, start=1): + track = self._get_track(track_data) + track.index = i + medium_totals[track.medium] += 1 + tracks.append(track) + for track in tracks: + track.medium_total = medium_totals[track.medium] + + return AlbumInfo( + album=album_data['title'], + album_id=deezer_id, + artist=artist, + artist_credit=self.get_artist([album_data['artist']])[0], + artist_id=artist_id, + tracks=tracks, + albumtype=album_data['record_type'], + va=len(album_data['contributors']) == 1 + and artist.lower() == 'various artists', + year=year, + month=month, + day=day, + label=album_data['label'], + mediums=max(medium_totals.keys()), + data_source=self.data_source, + data_url=album_data['link'], + ) + + def _get_track(self, track_data): + """Convert a Deezer track object dict to a TrackInfo object. + + :param track_data: Deezer Track object dict + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo + """ + artist, artist_id = self.get_artist( + track_data.get('contributors', [track_data['artist']]) + ) + return TrackInfo( + title=track_data['title'], + track_id=track_data['id'], + artist=artist, + artist_id=artist_id, + length=track_data['duration'], + index=track_data['track_position'], + medium=track_data['disk_number'], + medium_index=track_data['track_position'], + data_source=self.data_source, + data_url=track_data['link'], + ) + + def track_for_id(self, track_id=None, track_data=None): + """Fetch a track by its Deezer ID or URL and return a + TrackInfo object or None if the track is not found. + + :param track_id: (Optional) Deezer ID or URL for the track. Either + ``track_id`` or ``track_data`` must be provided. + :type track_id: str + :param track_data: (Optional) Simplified track object dict. May be + provided instead of ``track_id`` to avoid unnecessary API calls. + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo or None + """ + if track_data is None: + deezer_id = self._get_id('track', track_id) + if deezer_id is None: + return None + track_data = requests.get(self.track_url + deezer_id).json() + track = self._get_track(track_data) + + # Get album's tracks to set `track.index` (position on the entire + # release) and `track.medium_total` (total number of tracks on + # the track's disc). + album_tracks_data = requests.get( + self.album_url + str(track_data['album']['id']) + '/tracks' + ).json()['data'] + medium_total = 0 + for i, track_data in enumerate(album_tracks_data, start=1): + if track_data['disk_number'] == track.medium: + medium_total += 1 + if track_data['id'] == track.track_id: + track.index = i + track.medium_total = medium_total + return track + + @staticmethod + def _construct_search_query(filters=None, keywords=''): + """Construct a query string with the specified filters and keywords to + be provided to the Deezer Search API + (https://developers.deezer.com/api/search). + + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: Query string to be provided to the Search API. + :rtype: str + """ + query_components = [ + keywords, + ' '.join(f'{k}:"{v}"' for k, v in filters.items()), + ] + query = ' '.join([q for q in query_components if q]) + if not isinstance(query, str): + query = query.decode('utf8') + return unidecode.unidecode(query) + + def _search_api(self, query_type, filters=None, keywords=''): + """Query the Deezer Search API for the specified ``keywords``, applying + the provided ``filters``. + + :param query_type: The Deezer Search API method to use. Valid types + are: 'album', 'artist', 'history', 'playlist', 'podcast', + 'radio', 'track', 'user', and 'track'. + :type query_type: str + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: JSON data for the class:`Response ` object or None + if no search results are returned. + :rtype: dict or None + """ + query = self._construct_search_query( + keywords=keywords, filters=filters + ) + if not query: + return None + self._log.debug( + f"Searching {self.data_source} for '{query}'" + ) + response = requests.get( + self.search_url + query_type, params={'q': query} + ) + response.raise_for_status() + response_data = response.json().get('data', []) + self._log.debug( + "Found {} result(s) from {} for '{}'", + len(response_data), + self.data_source, + query, + ) + return response_data diff --git a/libs/common/beetsplug/discogs.py b/libs/common/beetsplug/discogs.py index eeb87d31..d015e420 100644 --- a/libs/common/beetsplug/discogs.py +++ b/libs/common/beetsplug/discogs.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -14,19 +13,18 @@ # included in all copies or substantial portions of the Software. """Adds Discogs album search support to the autotagger. Requires the -discogs-client library. +python3-discogs-client library. """ -from __future__ import division, absolute_import, print_function import beets.ui from beets import config -from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance -from beets.plugins import BeetsPlugin -from beets.util import confit +from beets.autotag.hooks import AlbumInfo, TrackInfo +from beets.plugins import MetadataSourcePlugin, BeetsPlugin, get_distance +import confuse from discogs_client import Release, Master, Client from discogs_client.exceptions import DiscogsAPIError from requests.exceptions import ConnectionError -from six.moves import http_client +import http.client import beets import re import time @@ -37,10 +35,12 @@ import traceback from string import ascii_lowercase -USER_AGENT = u'beets/{0} +http://beets.io/'.format(beets.__version__) +USER_AGENT = f'beets/{beets.__version__} +https://beets.io/' +API_KEY = 'rAzVUQYRaoFjeBjyWuWZ' +API_SECRET = 'plxtUTqoCzwxZpqdPysCwGuBSmZNdZVy' # Exceptions that discogs_client should really handle but does not. -CONNECTION_ERRORS = (ConnectionError, socket.error, http_client.HTTPException, +CONNECTION_ERRORS = (ConnectionError, socket.error, http.client.HTTPException, ValueError, # JSON decoding raises a ValueError. DiscogsAPIError) @@ -48,13 +48,15 @@ CONNECTION_ERRORS = (ConnectionError, socket.error, http_client.HTTPException, class DiscogsPlugin(BeetsPlugin): def __init__(self): - super(DiscogsPlugin, self).__init__() + super().__init__() self.config.add({ - 'apikey': 'rAzVUQYRaoFjeBjyWuWZ', - 'apisecret': 'plxtUTqoCzwxZpqdPysCwGuBSmZNdZVy', + 'apikey': API_KEY, + 'apisecret': API_SECRET, 'tokenfile': 'discogs_token.json', 'source_weight': 0.5, 'user_token': '', + 'separator': ', ', + 'index_tracks': False, }) self.config['apikey'].redact = True self.config['apisecret'].redact = True @@ -71,6 +73,8 @@ class DiscogsPlugin(BeetsPlugin): # Try using a configured user token (bypassing OAuth login). user_token = self.config['user_token'].as_str() if user_token: + # The rate limit for authenticated users goes up to 60 + # requests per minute. self.discogs_client = Client(USER_AGENT, user_token=user_token) return @@ -78,7 +82,7 @@ class DiscogsPlugin(BeetsPlugin): try: with open(self._tokenfile()) as f: tokendata = json.load(f) - except IOError: + except OSError: # No token yet. Generate one. token, secret = self.authenticate(c_key, c_secret) else: @@ -97,7 +101,7 @@ class DiscogsPlugin(BeetsPlugin): def _tokenfile(self): """Get the path to the JSON file for storing the OAuth token. """ - return self.config['tokenfile'].get(confit.Filename(in_app_dir=True)) + return self.config['tokenfile'].get(confuse.Filename(in_app_dir=True)) def authenticate(self, c_key, c_secret): # Get the link for the OAuth page. @@ -105,24 +109,24 @@ class DiscogsPlugin(BeetsPlugin): try: _, _, url = auth_client.get_authorize_url() except CONNECTION_ERRORS as e: - self._log.debug(u'connection error: {0}', e) - raise beets.ui.UserError(u'communication with Discogs failed') + self._log.debug('connection error: {0}', e) + raise beets.ui.UserError('communication with Discogs failed') - beets.ui.print_(u"To authenticate with Discogs, visit:") + beets.ui.print_("To authenticate with Discogs, visit:") beets.ui.print_(url) # Ask for the code and validate it. - code = beets.ui.input_(u"Enter the code:") + code = beets.ui.input_("Enter the code:") try: token, secret = auth_client.get_access_token(code) except DiscogsAPIError: - raise beets.ui.UserError(u'Discogs authorization failed') + raise beets.ui.UserError('Discogs authorization failed') except CONNECTION_ERRORS as e: - self._log.debug(u'connection error: {0}', e) - raise beets.ui.UserError(u'Discogs token request failed') + self._log.debug('connection error: {0}', e) + raise beets.ui.UserError('Discogs token request failed') # Save the token for later use. - self._log.debug(u'Discogs token {0}, secret {1}', token, secret) + self._log.debug('Discogs token {0}, secret {1}', token, secret) with open(self._tokenfile(), 'w') as f: json.dump({'token': token, 'secret': secret}, f) @@ -131,12 +135,22 @@ class DiscogsPlugin(BeetsPlugin): def album_distance(self, items, album_info, mapping): """Returns the album distance. """ - dist = Distance() - if album_info.data_source == 'Discogs': - dist.add('source', self.config['source_weight'].as_number()) - return dist + return get_distance( + data_source='Discogs', + info=album_info, + config=self.config + ) - def candidates(self, items, artist, album, va_likely): + def track_distance(self, item, track_info): + """Returns the track distance. + """ + return get_distance( + data_source='Discogs', + info=track_info, + config=self.config + ) + + def candidates(self, items, artist, album, va_likely, extra_tags=None): """Returns a list of AlbumInfo objects for discogs search results matching an album and artist (if not various). """ @@ -146,20 +160,45 @@ class DiscogsPlugin(BeetsPlugin): if va_likely: query = album else: - query = '%s %s' % (artist, album) + query = f'{artist} {album}' try: return self.get_albums(query) except DiscogsAPIError as e: - self._log.debug(u'API Error: {0} (query: {1})', e, query) + self._log.debug('API Error: {0} (query: {1})', e, query) if e.status_code == 401: self.reset_auth() return self.candidates(items, artist, album, va_likely) else: return [] except CONNECTION_ERRORS: - self._log.debug(u'Connection error in album search', exc_info=True) + self._log.debug('Connection error in album search', exc_info=True) return [] + @staticmethod + def extract_release_id_regex(album_id): + """Returns the Discogs_id or None.""" + # Discogs-IDs are simple integers. In order to avoid confusion with + # other metadata plugins, we only look for very specific formats of the + # input string: + # - plain integer, optionally wrapped in brackets and prefixed by an + # 'r', as this is how discogs displays the release ID on its webpage. + # - legacy url format: discogs.com//release/ + # - current url format: discogs.com/release/- + # See #291, #4080 and #4085 for the discussions leading up to these + # patterns. + # Regex has been tested here https://regex101.com/r/wyLdB4/2 + + for pattern in [ + r'^\[?r?(?P\d+)\]?$', + r'discogs\.com/release/(?P\d+)-', + r'discogs\.com/[^/]+/release/(?P\d+)', + ]: + match = re.search(pattern, album_id) + if match: + return int(match.group('id')) + + return None + def album_for_id(self, album_id): """Fetches an album by its Discogs ID and returns an AlbumInfo object or None if the album is not found. @@ -167,28 +206,28 @@ class DiscogsPlugin(BeetsPlugin): if not self.discogs_client: return - self._log.debug(u'Searching for release {0}', album_id) - # Discogs-IDs are simple integers. We only look for those at the end - # of an input string as to avoid confusion with other metadata plugins. - # An optional bracket can follow the integer, as this is how discogs - # displays the release ID on its webpage. - match = re.search(r'(^|\[*r|discogs\.com/.+/release/)(\d+)($|\])', - album_id) - if not match: + self._log.debug('Searching for release {0}', album_id) + + discogs_id = self.extract_release_id_regex(album_id) + + if not discogs_id: return None - result = Release(self.discogs_client, {'id': int(match.group(2))}) + + result = Release(self.discogs_client, {'id': discogs_id}) # Try to obtain title to verify that we indeed have a valid Release try: getattr(result, 'title') except DiscogsAPIError as e: if e.status_code != 404: - self._log.debug(u'API Error: {0} (query: {1})', e, result._uri) + self._log.debug('API Error: {0} (query: {1})', e, + result.data['resource_url']) if e.status_code == 401: self.reset_auth() return self.album_for_id(album_id) return None except CONNECTION_ERRORS: - self._log.debug(u'Connection error in album lookup', exc_info=True) + self._log.debug('Connection error in album lookup', + exc_info=True) return None return self.get_album_info(result) @@ -199,18 +238,17 @@ class DiscogsPlugin(BeetsPlugin): # cause a query to return no results, even if they match the artist or # album title. Use `re.UNICODE` flag to avoid stripping non-english # word characters. - # FIXME: Encode as ASCII to work around a bug: - # https://github.com/beetbox/beets/issues/1051 - # When the library is fixed, we should encode as UTF-8. - query = re.sub(r'(?u)\W+', ' ', query).encode('ascii', "replace") + query = re.sub(r'(?u)\W+', ' ', query) # Strip medium information from query, Things like "CD1" and "disk 1" # can also negate an otherwise positive result. - query = re.sub(br'(?i)\b(CD|disc)\s*\d+', b'', query) + query = re.sub(r'(?i)\b(CD|disc)\s*\d+', '', query) + try: releases = self.discogs_client.search(query, type='release').page(1) + except CONNECTION_ERRORS: - self._log.debug(u"Communication error while searching for {0!r}", + self._log.debug("Communication error while searching for {0!r}", query, exc_info=True) return [] return [album for album in map(self.get_album_info, releases[:5]) @@ -220,20 +258,22 @@ class DiscogsPlugin(BeetsPlugin): """Fetches a master release given its Discogs ID and returns its year or None if the master release is not found. """ - self._log.debug(u'Searching for master release {0}', master_id) + self._log.debug('Searching for master release {0}', master_id) result = Master(self.discogs_client, {'id': master_id}) + try: year = result.fetch('year') return year except DiscogsAPIError as e: if e.status_code != 404: - self._log.debug(u'API Error: {0} (query: {1})', e, result._uri) + self._log.debug('API Error: {0} (query: {1})', e, + result.data['resource_url']) if e.status_code == 401: self.reset_auth() return self.get_master_year(master_id) return None except CONNECTION_ERRORS: - self._log.debug(u'Connection error in master release lookup', + self._log.debug('Connection error in master release lookup', exc_info=True) return None @@ -252,10 +292,12 @@ class DiscogsPlugin(BeetsPlugin): # https://www.discogs.com/help/doc/submission-guidelines-general-rules if not all([result.data.get(k) for k in ['artists', 'title', 'id', 'tracklist']]): - self._log.warn(u"Release does not contain the required fields") + self._log.warning("Release does not contain the required fields") return None - artist, artist_id = self.get_artist([a.data for a in result.artists]) + artist, artist_id = MetadataSourcePlugin.get_artist( + [a.data for a in result.artists] + ) album = re.sub(r' +', ' ', result.title) album_id = result.data['id'] # Use `.data` to access the tracklist directly instead of the @@ -270,10 +312,13 @@ class DiscogsPlugin(BeetsPlugin): mediums = [t.medium for t in tracks] country = result.data.get('country') data_url = result.data.get('uri') + style = self.format(result.data.get('styles')) + genre = self.format(result.data.get('genres')) + discogs_albumid = self.extract_release_id(result.data.get('uri')) # Extract information for the optional AlbumInfo fields that are # contained on nested discogs fields. - albumtype = media = label = catalogno = None + albumtype = media = label = catalogno = labelid = None if result.data.get('formats'): albumtype = ', '.join( result.data['formats'][0].get('descriptions', [])) or None @@ -281,12 +326,13 @@ class DiscogsPlugin(BeetsPlugin): if result.data.get('labels'): label = result.data['labels'][0].get('name') catalogno = result.data['labels'][0].get('catno') + labelid = result.data['labels'][0].get('id') # Additional cleanups (various artists name, catalog number, media). if va: artist = config['va_name'].as_str() if catalogno == 'none': - catalogno = None + catalogno = None # Explicitly set the `media` for the tracks, since it is expected by # `autotag.apply_metadata`, and set `medium_total`. for track in tracks: @@ -302,36 +348,29 @@ class DiscogsPlugin(BeetsPlugin): # a master release, otherwise fetch the master release. original_year = self.get_master_year(master_id) if master_id else year - return AlbumInfo(album, album_id, artist, artist_id, tracks, asin=None, - albumtype=albumtype, va=va, year=year, month=None, - day=None, label=label, mediums=len(set(mediums)), - artist_sort=None, releasegroup_id=master_id, - catalognum=catalogno, script=None, language=None, - country=country, albumstatus=None, media=media, - albumdisambig=None, artist_credit=None, - original_year=original_year, original_month=None, - original_day=None, data_source='Discogs', - data_url=data_url) + return AlbumInfo(album=album, album_id=album_id, artist=artist, + artist_id=artist_id, tracks=tracks, + albumtype=albumtype, va=va, year=year, + label=label, mediums=len(set(mediums)), + releasegroup_id=master_id, catalognum=catalogno, + country=country, style=style, genre=genre, + media=media, original_year=original_year, + data_source='Discogs', data_url=data_url, + discogs_albumid=discogs_albumid, + discogs_labelid=labelid, discogs_artistid=artist_id) - def get_artist(self, artists): - """Returns an artist string (all artists) and an artist_id (the main - artist) for a list of discogs album or track artists. - """ - artist_id = None - bits = [] - for i, artist in enumerate(artists): - if not artist_id: - artist_id = artist['id'] - name = artist['name'] - # Strip disambiguation number. - name = re.sub(r' \(\d+\)$', '', name) - # Move articles to the front. - name = re.sub(r'(?i)^(.*?), (a|an|the)$', r'\2 \1', name) - bits.append(name) - if artist['join'] and i < len(artists) - 1: - bits.append(artist['join']) - artist = ' '.join(bits).replace(' ,', ',') or None - return artist, artist_id + def format(self, classification): + if classification: + return self.config['separator'].as_str() \ + .join(sorted(classification)) + else: + return None + + def extract_release_id(self, uri): + if uri: + return uri.split("/")[-1] + else: + return None def get_tracks(self, tracklist): """Returns a list of TrackInfo objects for a discogs tracklist. @@ -342,20 +381,34 @@ class DiscogsPlugin(BeetsPlugin): # FIXME: this is an extra precaution for making sure there are no # side effects after #2222. It should be removed after further # testing. - self._log.debug(u'{}', traceback.format_exc()) - self._log.error(u'uncaught exception in coalesce_tracks: {}', exc) + self._log.debug('{}', traceback.format_exc()) + self._log.error('uncaught exception in coalesce_tracks: {}', exc) clean_tracklist = tracklist tracks = [] index_tracks = {} index = 0 + # Distinct works and intra-work divisions, as defined by index tracks. + divisions, next_divisions = [], [] for track in clean_tracklist: # Only real tracks have `position`. Otherwise, it's an index track. if track['position']: index += 1 - track_info = self.get_track_info(track, index) + if next_divisions: + # End of a block of index tracks: update the current + # divisions. + divisions += next_divisions + del next_divisions[:] + track_info = self.get_track_info(track, index, divisions) track_info.track_alt = track['position'] tracks.append(track_info) else: + next_divisions.append(track['title']) + # We expect new levels of division at the beginning of the + # tracklist (and possibly elsewhere). + try: + divisions.pop() + except IndexError: + pass index_tracks[index + 1] = track['title'] # Fix up medium and medium_index for each track. Discogs position is @@ -367,7 +420,7 @@ class DiscogsPlugin(BeetsPlugin): # If a medium has two sides (ie. vinyl or cassette), each pair of # consecutive sides should belong to the same medium. if all([track.medium is not None for track in tracks]): - m = sorted(set([track.medium.lower() for track in tracks])) + m = sorted({track.medium.lower() for track in tracks}) # If all track.medium are single consecutive letters, assume it is # a 2-sided medium. if ''.join(m) in ascii_lowercase: @@ -426,7 +479,7 @@ class DiscogsPlugin(BeetsPlugin): # Calculate position based on first subtrack, without subindex. idx, medium_idx, sub_idx = \ self.get_track_index(subtracks[0]['position']) - position = '%s%s' % (idx or '', medium_idx or '') + position = '{}{}'.format(idx or '', medium_idx or '') if tracklist and not tracklist[-1]['position']: # Assume the previous index track contains the track title. @@ -444,6 +497,12 @@ class DiscogsPlugin(BeetsPlugin): for subtrack in subtracks: if not subtrack.get('artists'): subtrack['artists'] = index_track['artists'] + # Concatenate index with track title when index_tracks + # option is set + if self.config['index_tracks']: + for subtrack in subtracks: + subtrack['title'] = '{}: {}'.format( + index_track['title'], subtrack['title']) tracklist.extend(subtracks) else: # Merge the subtracks, pick a title, and append the new track. @@ -490,18 +549,23 @@ class DiscogsPlugin(BeetsPlugin): return tracklist - def get_track_info(self, track, index): + def get_track_info(self, track, index, divisions): """Returns a TrackInfo object for a discogs track. """ title = track['title'] + if self.config['index_tracks']: + prefix = ', '.join(divisions) + if prefix: + title = f'{prefix}: {title}' track_id = None medium, medium_index, _ = self.get_track_index(track['position']) - artist, artist_id = self.get_artist(track.get('artists', [])) + artist, artist_id = MetadataSourcePlugin.get_artist( + track.get('artists', []) + ) length = self.get_track_length(track['duration']) - return TrackInfo(title, track_id, artist=artist, artist_id=artist_id, - length=length, index=index, - medium=medium, medium_index=medium_index, - artist_sort=None, disctitle=None, artist_credit=None) + return TrackInfo(title=title, track_id=track_id, artist=artist, + artist_id=artist_id, length=length, index=index, + medium=medium, medium_index=medium_index) def get_track_index(self, position): """Returns the medium, medium index and subtrack index for a discogs @@ -528,7 +592,7 @@ class DiscogsPlugin(BeetsPlugin): if subindex and subindex.startswith('.'): subindex = subindex[1:] else: - self._log.debug(u'Invalid position: {0}', position) + self._log.debug('Invalid position: {0}', position) medium = index = subindex = None return medium or None, index or None, subindex or None diff --git a/libs/common/beetsplug/duplicates.py b/libs/common/beetsplug/duplicates.py index b316cfda..fdd5c175 100644 --- a/libs/common/beetsplug/duplicates.py +++ b/libs/common/beetsplug/duplicates.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Pedro Silva. # @@ -15,16 +14,15 @@ """List duplicate tracks or albums. """ -from __future__ import division, absolute_import, print_function import shlex from beets.plugins import BeetsPlugin from beets.ui import decargs, print_, Subcommand, UserError from beets.util import command_output, displayable_path, subprocess, \ - bytestring_path, MoveOperation + bytestring_path, MoveOperation, decode_commandline_path from beets.library import Item, Album -import six + PLUGIN = 'duplicates' @@ -33,7 +31,7 @@ class DuplicatesPlugin(BeetsPlugin): """List duplicate tracks or albums """ def __init__(self): - super(DuplicatesPlugin, self).__init__() + super().__init__() self.config.add({ 'album': False, @@ -56,54 +54,54 @@ class DuplicatesPlugin(BeetsPlugin): help=__doc__, aliases=['dup']) self._command.parser.add_option( - u'-c', u'--count', dest='count', + '-c', '--count', dest='count', action='store_true', - help=u'show duplicate counts', + help='show duplicate counts', ) self._command.parser.add_option( - u'-C', u'--checksum', dest='checksum', + '-C', '--checksum', dest='checksum', action='store', metavar='PROG', - help=u'report duplicates based on arbitrary command', + help='report duplicates based on arbitrary command', ) self._command.parser.add_option( - u'-d', u'--delete', dest='delete', + '-d', '--delete', dest='delete', action='store_true', - help=u'delete items from library and disk', + help='delete items from library and disk', ) self._command.parser.add_option( - u'-F', u'--full', dest='full', + '-F', '--full', dest='full', action='store_true', - help=u'show all versions of duplicate tracks or albums', + help='show all versions of duplicate tracks or albums', ) self._command.parser.add_option( - u'-s', u'--strict', dest='strict', + '-s', '--strict', dest='strict', action='store_true', - help=u'report duplicates only if all attributes are set', + help='report duplicates only if all attributes are set', ) self._command.parser.add_option( - u'-k', u'--key', dest='keys', + '-k', '--key', dest='keys', action='append', metavar='KEY', - help=u'report duplicates based on keys (use multiple times)', + help='report duplicates based on keys (use multiple times)', ) self._command.parser.add_option( - u'-M', u'--merge', dest='merge', + '-M', '--merge', dest='merge', action='store_true', - help=u'merge duplicate items', + help='merge duplicate items', ) self._command.parser.add_option( - u'-m', u'--move', dest='move', + '-m', '--move', dest='move', action='store', metavar='DEST', - help=u'move items to dest', + help='move items to dest', ) self._command.parser.add_option( - u'-o', u'--copy', dest='copy', + '-o', '--copy', dest='copy', action='store', metavar='DEST', - help=u'copy items to dest', + help='copy items to dest', ) self._command.parser.add_option( - u'-t', u'--tag', dest='tag', + '-t', '--tag', dest='tag', action='store', - help=u'tag matched items with \'k=v\' attribute', + help='tag matched items with \'k=v\' attribute', ) self._command.parser.add_all_common_options() @@ -135,16 +133,21 @@ class DuplicatesPlugin(BeetsPlugin): keys = ['mb_trackid', 'mb_albumid'] items = lib.items(decargs(args)) + # If there's nothing to do, return early. The code below assumes + # `items` to be non-empty. + if not items: + return + if path: - fmt = u'$path' + fmt = '$path' # Default format string for count mode. if count and not fmt: if album: - fmt = u'$albumartist - $album' + fmt = '$albumartist - $album' else: - fmt = u'$albumartist - $album - $title' - fmt += u': {0}' + fmt = '$albumartist - $album - $title' + fmt += ': {0}' if checksum: for i in items: @@ -170,7 +173,7 @@ class DuplicatesPlugin(BeetsPlugin): return [self._command] def _process_item(self, item, copy=False, move=False, delete=False, - tag=False, fmt=u''): + tag=False, fmt=''): """Process Item `item`. """ print_(format(item, fmt)) @@ -187,7 +190,7 @@ class DuplicatesPlugin(BeetsPlugin): k, v = tag.split('=') except Exception: raise UserError( - u"{}: can't parse k=v tag: {}".format(PLUGIN, tag) + f"{PLUGIN}: can't parse k=v tag: {tag}" ) setattr(item, k, v) item.store() @@ -197,25 +200,26 @@ class DuplicatesPlugin(BeetsPlugin): output as flexattr on a key that is the name of the program, and return the key, checksum tuple. """ - args = [p.format(file=item.path) for p in shlex.split(prog)] + args = [p.format(file=decode_commandline_path(item.path)) + for p in shlex.split(prog)] key = args[0] checksum = getattr(item, key, False) if not checksum: - self._log.debug(u'key {0} on item {1} not cached:' - u'computing checksum', + self._log.debug('key {0} on item {1} not cached:' + 'computing checksum', key, displayable_path(item.path)) try: - checksum = command_output(args) + checksum = command_output(args).stdout setattr(item, key, checksum) item.store() - self._log.debug(u'computed checksum for {0} using {1}', + self._log.debug('computed checksum for {0} using {1}', item.title, key) except subprocess.CalledProcessError as e: - self._log.debug(u'failed to checksum {0}: {1}', + self._log.debug('failed to checksum {0}: {1}', displayable_path(item.path), e) else: - self._log.debug(u'key {0} on item {1} cached:' - u'not computing checksum', + self._log.debug('key {0} on item {1} cached:' + 'not computing checksum', key, displayable_path(item.path)) return key, checksum @@ -231,12 +235,12 @@ class DuplicatesPlugin(BeetsPlugin): values = [getattr(obj, k, None) for k in keys] values = [v for v in values if v not in (None, '')] if strict and len(values) < len(keys): - self._log.debug(u'some keys {0} on item {1} are null or empty:' - u' skipping', + self._log.debug('some keys {0} on item {1} are null or empty:' + ' skipping', keys, displayable_path(obj.path)) elif (not strict and not len(values)): - self._log.debug(u'all keys {0} on item {1} are null or empty:' - u' skipping', + self._log.debug('all keys {0} on item {1} are null or empty:' + ' skipping', keys, displayable_path(obj.path)) else: key = tuple(values) @@ -264,7 +268,7 @@ class DuplicatesPlugin(BeetsPlugin): # between a bytes object and the empty Unicode # string ''. return v is not None and \ - (v != '' if isinstance(v, six.text_type) else True) + (v != '' if isinstance(v, str) else True) fields = Item.all_keys() key = lambda x: sum(1 for f in fields if truthy(getattr(x, f))) else: @@ -284,8 +288,8 @@ class DuplicatesPlugin(BeetsPlugin): if getattr(objs[0], f, None) in (None, ''): value = getattr(o, f, None) if value: - self._log.debug(u'key {0} on item {1} is null ' - u'or empty: setting from item {2}', + self._log.debug('key {0} on item {1} is null ' + 'or empty: setting from item {2}', f, displayable_path(objs[0].path), displayable_path(o.path)) setattr(objs[0], f, value) @@ -305,8 +309,8 @@ class DuplicatesPlugin(BeetsPlugin): missing = Item.from_path(i.path) missing.album_id = objs[0].id missing.add(i._db) - self._log.debug(u'item {0} missing from album {1}:' - u' merging from {2} into {3}', + self._log.debug('item {0} missing from album {1}:' + ' merging from {2} into {3}', missing, objs[0], displayable_path(o.path), diff --git a/libs/common/beetsplug/edit.py b/libs/common/beetsplug/edit.py index 631a1b58..6f03fa4d 100644 --- a/libs/common/beetsplug/edit.py +++ b/libs/common/beetsplug/edit.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016 # @@ -15,7 +14,6 @@ """Open metadata information in a text editor to let the user edit it. """ -from __future__ import division, absolute_import, print_function from beets import plugins from beets import util @@ -28,7 +26,7 @@ import subprocess import yaml from tempfile import NamedTemporaryFile import os -import six +import shlex # These "safe" types can avoid the format/parse cycle that most fields go @@ -45,13 +43,13 @@ class ParseError(Exception): def edit(filename, log): """Open `filename` in a text editor. """ - cmd = util.shlex_split(util.editor_command()) + cmd = shlex.split(util.editor_command()) cmd.append(filename) - log.debug(u'invoking editor command: {!r}', cmd) + log.debug('invoking editor command: {!r}', cmd) try: subprocess.call(cmd) except OSError as exc: - raise ui.UserError(u'could not run editor command {!r}: {}'.format( + raise ui.UserError('could not run editor command {!r}: {}'.format( cmd[0], exc )) @@ -74,20 +72,20 @@ def load(s): """ try: out = [] - for d in yaml.load_all(s): + for d in yaml.safe_load_all(s): if not isinstance(d, dict): raise ParseError( - u'each entry must be a dictionary; found {}'.format( + 'each entry must be a dictionary; found {}'.format( type(d).__name__ ) ) # Convert all keys to strings. They started out as strings, # but the user may have inadvertently messed this up. - out.append({six.text_type(k): v for k, v in d.items()}) + out.append({str(k): v for k, v in d.items()}) except yaml.YAMLError as e: - raise ParseError(u'invalid YAML: {}'.format(e)) + raise ParseError(f'invalid YAML: {e}') return out @@ -143,13 +141,13 @@ def apply_(obj, data): else: # Either the field was stringified originally or the user changed # it from a safe type to an unsafe one. Parse it as a string. - obj.set_parse(key, six.text_type(value)) + obj.set_parse(key, str(value)) class EditPlugin(plugins.BeetsPlugin): def __init__(self): - super(EditPlugin, self).__init__() + super().__init__() self.config.add({ # The default fields to edit. @@ -166,18 +164,18 @@ class EditPlugin(plugins.BeetsPlugin): def commands(self): edit_command = ui.Subcommand( 'edit', - help=u'interactively edit metadata' + help='interactively edit metadata' ) edit_command.parser.add_option( - u'-f', u'--field', + '-f', '--field', metavar='FIELD', action='append', - help=u'edit this field also', + help='edit this field also', ) edit_command.parser.add_option( - u'--all', + '--all', action='store_true', dest='all', - help=u'edit all fields', + help='edit all fields', ) edit_command.parser.add_album_option() edit_command.func = self._edit_command @@ -191,7 +189,7 @@ class EditPlugin(plugins.BeetsPlugin): items, albums = _do_query(lib, query, opts.album, False) objs = albums if opts.album else items if not objs: - ui.print_(u'Nothing to edit.') + ui.print_('Nothing to edit.') return # Get the fields to edit. @@ -244,15 +242,10 @@ class EditPlugin(plugins.BeetsPlugin): old_data = [flatten(o, fields) for o in objs] # Set up a temporary file with the initial data for editing. - if six.PY2: - new = NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) - else: - new = NamedTemporaryFile(mode='w', suffix='.yaml', delete=False, - encoding='utf-8') + new = NamedTemporaryFile(mode='w', suffix='.yaml', delete=False, + encoding='utf-8') old_str = dump(old_data) new.write(old_str) - if six.PY2: - old_str = old_str.decode('utf-8') new.close() # Loop until we have parseable data and the user confirms. @@ -266,15 +259,15 @@ class EditPlugin(plugins.BeetsPlugin): with codecs.open(new.name, encoding='utf-8') as f: new_str = f.read() if new_str == old_str: - ui.print_(u"No changes; aborting.") + ui.print_("No changes; aborting.") return False # Parse the updated data. try: new_data = load(new_str) except ParseError as e: - ui.print_(u"Could not read data: {}".format(e)) - if ui.input_yn(u"Edit again to fix? (Y/n)", True): + ui.print_(f"Could not read data: {e}") + if ui.input_yn("Edit again to fix? (Y/n)", True): continue else: return False @@ -289,18 +282,18 @@ class EditPlugin(plugins.BeetsPlugin): for obj, obj_old in zip(objs, objs_old): changed |= ui.show_model_changes(obj, obj_old) if not changed: - ui.print_(u'No changes to apply.') + ui.print_('No changes to apply.') return False # Confirm the changes. choice = ui.input_options( - (u'continue Editing', u'apply', u'cancel') + ('continue Editing', 'apply', 'cancel') ) - if choice == u'a': # Apply. + if choice == 'a': # Apply. return True - elif choice == u'c': # Cancel. + elif choice == 'c': # Cancel. return False - elif choice == u'e': # Keep editing. + elif choice == 'e': # Keep editing. # Reset the temporary changes to the objects. I we have a # copy from above, use that, else reload from the database. objs = [(old_obj or obj) @@ -322,7 +315,7 @@ class EditPlugin(plugins.BeetsPlugin): are temporary. """ if len(old_data) != len(new_data): - self._log.warning(u'number of objects changed from {} to {}', + self._log.warning('number of objects changed from {} to {}', len(old_data), len(new_data)) obj_by_id = {o.id: o for o in objs} @@ -333,7 +326,7 @@ class EditPlugin(plugins.BeetsPlugin): forbidden = False for key in ignore_fields: if old_dict.get(key) != new_dict.get(key): - self._log.warning(u'ignoring object whose {} changed', key) + self._log.warning('ignoring object whose {} changed', key) forbidden = True break if forbidden: @@ -348,7 +341,7 @@ class EditPlugin(plugins.BeetsPlugin): # Save to the database and possibly write tags. for ob in objs: if ob._dirty: - self._log.debug(u'saving changes to {}', ob) + self._log.debug('saving changes to {}', ob) ob.try_sync(ui.should_write(), ui.should_move()) # Methods for interactive importer execution. diff --git a/libs/common/beetsplug/embedart.py b/libs/common/beetsplug/embedart.py index afe8f86f..6db46f8c 100644 --- a/libs/common/beetsplug/embedart.py +++ b/libs/common/beetsplug/embedart.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -14,7 +13,6 @@ # included in all copies or substantial portions of the Software. """Allows beets to embed album art into file metadata.""" -from __future__ import division, absolute_import, print_function import os.path @@ -34,11 +32,11 @@ def _confirm(objs, album): `album` is a Boolean indicating whether these are albums (as opposed to items). """ - noun = u'album' if album else u'file' - prompt = u'Modify artwork for {} {}{} (Y/n)?'.format( + noun = 'album' if album else 'file' + prompt = 'Modify artwork for {} {}{} (Y/n)?'.format( len(objs), noun, - u's' if len(objs) > 1 else u'' + 's' if len(objs) > 1 else '' ) # Show all the items or albums. @@ -53,39 +51,41 @@ class EmbedCoverArtPlugin(BeetsPlugin): """Allows albumart to be embedded into the actual files. """ def __init__(self): - super(EmbedCoverArtPlugin, self).__init__() + super().__init__() self.config.add({ 'maxwidth': 0, 'auto': True, 'compare_threshold': 0, 'ifempty': False, - 'remove_art_file': False + 'remove_art_file': False, + 'quality': 0, }) if self.config['maxwidth'].get(int) and not ArtResizer.shared.local: self.config['maxwidth'] = 0 - self._log.warning(u"ImageMagick or PIL not found; " - u"'maxwidth' option ignored") + self._log.warning("ImageMagick or PIL not found; " + "'maxwidth' option ignored") if self.config['compare_threshold'].get(int) and not \ ArtResizer.shared.can_compare: self.config['compare_threshold'] = 0 - self._log.warning(u"ImageMagick 6.8.7 or higher not installed; " - u"'compare_threshold' option ignored") + self._log.warning("ImageMagick 6.8.7 or higher not installed; " + "'compare_threshold' option ignored") self.register_listener('art_set', self.process_album) def commands(self): # Embed command. embed_cmd = ui.Subcommand( - 'embedart', help=u'embed image files into file metadata' + 'embedart', help='embed image files into file metadata' ) embed_cmd.parser.add_option( - u'-f', u'--file', metavar='PATH', help=u'the image file to embed' + '-f', '--file', metavar='PATH', help='the image file to embed' ) embed_cmd.parser.add_option( - u"-y", u"--yes", action="store_true", help=u"skip confirmation" + "-y", "--yes", action="store_true", help="skip confirmation" ) maxwidth = self.config['maxwidth'].get(int) + quality = self.config['quality'].get(int) compare_threshold = self.config['compare_threshold'].get(int) ifempty = self.config['ifempty'].get(bool) @@ -93,7 +93,7 @@ class EmbedCoverArtPlugin(BeetsPlugin): if opts.file: imagepath = normpath(opts.file) if not os.path.isfile(syspath(imagepath)): - raise ui.UserError(u'image file {0} not found'.format( + raise ui.UserError('image file {} not found'.format( displayable_path(imagepath) )) @@ -104,8 +104,9 @@ class EmbedCoverArtPlugin(BeetsPlugin): return for item in items: - art.embed_item(self._log, item, imagepath, maxwidth, None, - compare_threshold, ifempty) + art.embed_item(self._log, item, imagepath, maxwidth, + None, compare_threshold, ifempty, + quality=quality) else: albums = lib.albums(decargs(args)) @@ -114,8 +115,9 @@ class EmbedCoverArtPlugin(BeetsPlugin): return for album in albums: - art.embed_album(self._log, album, maxwidth, False, - compare_threshold, ifempty) + art.embed_album(self._log, album, maxwidth, + False, compare_threshold, ifempty, + quality=quality) self.remove_artfile(album) embed_cmd.func = embed_func @@ -123,15 +125,15 @@ class EmbedCoverArtPlugin(BeetsPlugin): # Extract command. extract_cmd = ui.Subcommand( 'extractart', - help=u'extract an image from file metadata', + help='extract an image from file metadata', ) extract_cmd.parser.add_option( - u'-o', dest='outpath', - help=u'image output file', + '-o', dest='outpath', + help='image output file', ) extract_cmd.parser.add_option( - u'-n', dest='filename', - help=u'image filename to create for all matched albums', + '-n', dest='filename', + help='image filename to create for all matched albums', ) extract_cmd.parser.add_option( '-a', dest='associate', action='store_true', @@ -147,7 +149,7 @@ class EmbedCoverArtPlugin(BeetsPlugin): config['art_filename'].get()) if os.path.dirname(filename) != b'': self._log.error( - u"Only specify a name rather than a path for -n") + "Only specify a name rather than a path for -n") return for album in lib.albums(decargs(args)): artpath = normpath(os.path.join(album.path, filename)) @@ -161,10 +163,10 @@ class EmbedCoverArtPlugin(BeetsPlugin): # Clear command. clear_cmd = ui.Subcommand( 'clearart', - help=u'remove images from file metadata', + help='remove images from file metadata', ) clear_cmd.parser.add_option( - u"-y", u"--yes", action="store_true", help=u"skip confirmation" + "-y", "--yes", action="store_true", help="skip confirmation" ) def clear_func(lib, opts, args): @@ -189,11 +191,11 @@ class EmbedCoverArtPlugin(BeetsPlugin): def remove_artfile(self, album): """Possibly delete the album art file for an album (if the - appropriate configuration option is enabled. + appropriate configuration option is enabled). """ if self.config['remove_art_file'] and album.artpath: if os.path.isfile(album.artpath): - self._log.debug(u'Removing album art file for {0}', album) + self._log.debug('Removing album art file for {0}', album) os.remove(album.artpath) album.artpath = None album.store() diff --git a/libs/common/beetsplug/embyupdate.py b/libs/common/beetsplug/embyupdate.py index 5c731954..c17fabad 100644 --- a/libs/common/beetsplug/embyupdate.py +++ b/libs/common/beetsplug/embyupdate.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Updates the Emby Library whenever the beets library is changed. emby: @@ -9,14 +7,11 @@ apikey: apikey password: password """ -from __future__ import division, absolute_import, print_function import hashlib import requests -from six.moves.urllib.parse import urlencode -from six.moves.urllib.parse import urljoin, parse_qs, urlsplit, urlunsplit - +from urllib.parse import urlencode, urljoin, parse_qs, urlsplit, urlunsplit from beets import config from beets.plugins import BeetsPlugin @@ -146,14 +141,14 @@ def get_user(host, port, username): class EmbyUpdate(BeetsPlugin): def __init__(self): - super(EmbyUpdate, self).__init__() + super().__init__() # Adding defaults. config['emby'].add({ - u'host': u'http://localhost', - u'port': 8096, - u'apikey': None, - u'password': None, + 'host': 'http://localhost', + 'port': 8096, + 'apikey': None, + 'password': None, }) self.register_listener('database_change', self.listen_for_db_change) @@ -166,7 +161,7 @@ class EmbyUpdate(BeetsPlugin): def update(self, lib): """When the client exists try to send refresh request to Emby. """ - self._log.info(u'Updating Emby library...') + self._log.info('Updating Emby library...') host = config['emby']['host'].get() port = config['emby']['port'].get() @@ -176,13 +171,13 @@ class EmbyUpdate(BeetsPlugin): # Check if at least a apikey or password is given. if not any([password, token]): - self._log.warning(u'Provide at least Emby password or apikey.') + self._log.warning('Provide at least Emby password or apikey.') return # Get user information from the Emby API. user = get_user(host, port, username) if not user: - self._log.warning(u'User {0} could not be found.'.format(username)) + self._log.warning(f'User {username} could not be found.') return if not token: @@ -194,7 +189,7 @@ class EmbyUpdate(BeetsPlugin): token = get_token(host, port, headers, auth_data) if not token: self._log.warning( - u'Could not get token for user {0}', username + 'Could not get token for user {0}', username ) return @@ -205,6 +200,6 @@ class EmbyUpdate(BeetsPlugin): url = api_url(host, port, '/Library/Refresh') r = requests.post(url, headers=headers) if r.status_code != 204: - self._log.warning(u'Update could not be triggered') + self._log.warning('Update could not be triggered') else: - self._log.info(u'Update triggered.') + self._log.info('Update triggered.') diff --git a/libs/common/beetsplug/export.py b/libs/common/beetsplug/export.py index 641b9fef..99f6d706 100644 --- a/libs/common/beetsplug/export.py +++ b/libs/common/beetsplug/export.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # # Permission is hereby granted, free of charge, to any person obtaining @@ -15,23 +14,25 @@ """Exports data from beets """ -from __future__ import division, absolute_import, print_function import sys -import json import codecs +import json +import csv +from xml.etree import ElementTree from datetime import datetime, date from beets.plugins import BeetsPlugin from beets import ui -from beets import mediafile -from beetsplug.info import make_key_filter, library_data, tag_data +from beets import util +import mediafile +from beetsplug.info import library_data, tag_data class ExportEncoder(json.JSONEncoder): """Deals with dates because JSON doesn't have a standard""" def default(self, o): - if isinstance(o, datetime) or isinstance(o, date): + if isinstance(o, (datetime, date)): return o.isoformat() return json.JSONEncoder.default(self, o) @@ -39,12 +40,12 @@ class ExportEncoder(json.JSONEncoder): class ExportPlugin(BeetsPlugin): def __init__(self): - super(ExportPlugin, self).__init__() + super().__init__() self.config.add({ 'default_format': 'json', 'json': { - # json module formatting options + # JSON module formatting options. 'formatting': { 'ensure_ascii': False, 'indent': 4, @@ -52,100 +53,175 @@ class ExportPlugin(BeetsPlugin): 'sort_keys': True } }, + 'jsonlines': { + # JSON Lines formatting options. + 'formatting': { + 'ensure_ascii': False, + 'separators': (',', ': '), + 'sort_keys': True + } + }, + 'csv': { + # CSV module formatting options. + 'formatting': { + # The delimiter used to seperate columns. + 'delimiter': ',', + # The dialect to use when formating the file output. + 'dialect': 'excel' + } + }, + 'xml': { + # XML module formatting options. + 'formatting': {} + } # TODO: Use something like the edit plugin # 'item_fields': [] }) def commands(self): - # TODO: Add option to use albums - - cmd = ui.Subcommand('export', help=u'export data from beets') + cmd = ui.Subcommand('export', help='export data from beets') cmd.func = self.run cmd.parser.add_option( - u'-l', u'--library', action='store_true', - help=u'show library fields instead of tags', + '-l', '--library', action='store_true', + help='show library fields instead of tags', ) cmd.parser.add_option( - u'--append', action='store_true', default=False, - help=u'if should append data to the file', + '-a', '--album', action='store_true', + help='show album fields instead of tracks (implies "--library")', ) cmd.parser.add_option( - u'-i', u'--include-keys', default=[], + '--append', action='store_true', default=False, + help='if should append data to the file', + ) + cmd.parser.add_option( + '-i', '--include-keys', default=[], action='append', dest='included_keys', - help=u'comma separated list of keys to show', + help='comma separated list of keys to show', ) cmd.parser.add_option( - u'-o', u'--output', - help=u'path for the output file. If not given, will print the data' + '-o', '--output', + help='path for the output file. If not given, will print the data' + ) + cmd.parser.add_option( + '-f', '--format', default='json', + help="the output format: json (default), jsonlines, csv, or xml" ) return [cmd] def run(self, lib, opts, args): - file_path = opts.output - file_format = self.config['default_format'].get(str) file_mode = 'a' if opts.append else 'w' + file_format = opts.format or self.config['default_format'].get(str) + file_format_is_line_based = (file_format == 'jsonlines') format_options = self.config[file_format]['formatting'].get(dict) export_format = ExportFormat.factory( - file_format, **{ + file_type=file_format, + **{ 'file_path': file_path, 'file_mode': file_mode } ) - items = [] - data_collector = library_data if opts.library else tag_data + if opts.library or opts.album: + data_collector = library_data + else: + data_collector = tag_data included_keys = [] for keys in opts.included_keys: included_keys.extend(keys.split(',')) - key_filter = make_key_filter(included_keys) - for data_emitter in data_collector(lib, ui.decargs(args)): + items = [] + for data_emitter in data_collector( + lib, ui.decargs(args), + album=opts.album, + ): try: - data, item = data_emitter() - except (mediafile.UnreadableFileError, IOError) as ex: - self._log.error(u'cannot read file: {0}', ex) + data, item = data_emitter(included_keys or '*') + except (mediafile.UnreadableFileError, OSError) as ex: + self._log.error('cannot read file: {0}', ex) continue - data = key_filter(data) - items += [data] + for key, value in data.items(): + if isinstance(value, bytes): + data[key] = util.displayable_path(value) - export_format.export(items, **format_options) - - -class ExportFormat(object): - """The output format type""" - - @classmethod - def factory(cls, type, **kwargs): - if type == "json": - if kwargs['file_path']: - return JsonFileFormat(**kwargs) + if file_format_is_line_based: + export_format.export(data, **format_options) else: - return JsonPrintFormat() - raise NotImplementedError() + items += [data] - def export(self, data, **kwargs): - raise NotImplementedError() + if not file_format_is_line_based: + export_format.export(items, **format_options) -class JsonPrintFormat(ExportFormat): - """Outputs to the console""" - - def export(self, data, **kwargs): - json.dump(data, sys.stdout, cls=ExportEncoder, **kwargs) - - -class JsonFileFormat(ExportFormat): - """Saves in a json file""" - - def __init__(self, file_path, file_mode=u'w', encoding=u'utf-8'): +class ExportFormat: + """The output format type""" + def __init__(self, file_path, file_mode='w', encoding='utf-8'): self.path = file_path self.mode = file_mode self.encoding = encoding + # creates a file object to write/append or sets to stdout + self.out_stream = codecs.open(self.path, self.mode, self.encoding) \ + if self.path else sys.stdout + + @classmethod + def factory(cls, file_type, **kwargs): + if file_type in ["json", "jsonlines"]: + return JsonFormat(**kwargs) + elif file_type == "csv": + return CSVFormat(**kwargs) + elif file_type == "xml": + return XMLFormat(**kwargs) + else: + raise NotImplementedError() def export(self, data, **kwargs): - with codecs.open(self.path, self.mode, self.encoding) as f: - json.dump(data, f, cls=ExportEncoder, **kwargs) + raise NotImplementedError() + + +class JsonFormat(ExportFormat): + """Saves in a json file""" + def __init__(self, file_path, file_mode='w', encoding='utf-8'): + super().__init__(file_path, file_mode, encoding) + + def export(self, data, **kwargs): + json.dump(data, self.out_stream, cls=ExportEncoder, **kwargs) + self.out_stream.write('\n') + + +class CSVFormat(ExportFormat): + """Saves in a csv file""" + def __init__(self, file_path, file_mode='w', encoding='utf-8'): + super().__init__(file_path, file_mode, encoding) + + def export(self, data, **kwargs): + header = list(data[0].keys()) if data else [] + writer = csv.DictWriter(self.out_stream, fieldnames=header, **kwargs) + writer.writeheader() + writer.writerows(data) + + +class XMLFormat(ExportFormat): + """Saves in a xml file""" + def __init__(self, file_path, file_mode='w', encoding='utf-8'): + super().__init__(file_path, file_mode, encoding) + + def export(self, data, **kwargs): + # Creates the XML file structure. + library = ElementTree.Element('library') + tracks = ElementTree.SubElement(library, 'tracks') + if data and isinstance(data[0], dict): + for index, item in enumerate(data): + track = ElementTree.SubElement(tracks, 'track') + for key, value in item.items(): + track_details = ElementTree.SubElement(track, key) + track_details.text = value + # Depending on the version of python the encoding needs to change + try: + data = ElementTree.tostring(library, encoding='unicode', **kwargs) + except LookupError: + data = ElementTree.tostring(library, encoding='utf-8', **kwargs) + + self.out_stream.write(data) diff --git a/libs/common/beetsplug/fetchart.py b/libs/common/beetsplug/fetchart.py index 0e106694..f2c1e5a7 100644 --- a/libs/common/beetsplug/fetchart.py +++ b/libs/common/beetsplug/fetchart.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,12 +14,12 @@ """Fetches album art. """ -from __future__ import division, absolute_import, print_function from contextlib import closing import os import re from tempfile import NamedTemporaryFile +from collections import OrderedDict import requests @@ -29,17 +28,11 @@ from beets import importer from beets import ui from beets import util from beets import config -from beets.mediafile import image_mime_type +from mediafile import image_mime_type from beets.util.artresizer import ArtResizer -from beets.util import confit +from beets.util import sorted_walk from beets.util import syspath, bytestring_path, py3_path -import six - -try: - import itunes - HAVE_ITUNES = True -except ImportError: - HAVE_ITUNES = False +import confuse CONTENT_TYPES = { 'image/jpeg': [b'jpg', b'jpeg'], @@ -48,18 +41,21 @@ CONTENT_TYPES = { IMAGE_EXTENSIONS = [ext for exts in CONTENT_TYPES.values() for ext in exts] -class Candidate(object): +class Candidate: """Holds information about a matching artwork, deals with validation of dimension restrictions and resizing. """ CANDIDATE_BAD = 0 CANDIDATE_EXACT = 1 CANDIDATE_DOWNSCALE = 2 + CANDIDATE_DOWNSIZE = 3 + CANDIDATE_DEINTERLACE = 4 + CANDIDATE_REFORMAT = 5 MATCH_EXACT = 0 MATCH_FALLBACK = 1 - def __init__(self, log, path=None, url=None, source=u'', + def __init__(self, log, path=None, url=None, source='', match=None, size=None): self._log = log self.path = path @@ -75,32 +71,39 @@ class Candidate(object): Return `CANDIDATE_BAD` if the file is unusable. Return `CANDIDATE_EXACT` if the file is usable as-is. - Return `CANDIDATE_DOWNSCALE` if the file must be resized. + Return `CANDIDATE_DOWNSCALE` if the file must be rescaled. + Return `CANDIDATE_DOWNSIZE` if the file must be resized, and possibly + also rescaled. + Return `CANDIDATE_DEINTERLACE` if the file must be deinterlaced. + Return `CANDIDATE_REFORMAT` if the file has to be converted. """ if not self.path: return self.CANDIDATE_BAD - if not (plugin.enforce_ratio or plugin.minwidth or plugin.maxwidth): + if (not (plugin.enforce_ratio or plugin.minwidth or plugin.maxwidth + or plugin.max_filesize or plugin.deinterlace + or plugin.cover_format)): return self.CANDIDATE_EXACT # get_size returns None if no local imaging backend is available if not self.size: self.size = ArtResizer.shared.get_size(self.path) - self._log.debug(u'image size: {}', self.size) + self._log.debug('image size: {}', self.size) if not self.size: - self._log.warning(u'Could not get size of image (please see ' - u'documentation for dependencies). ' - u'The configuration options `minwidth` and ' - u'`enforce_ratio` may be violated.') + self._log.warning('Could not get size of image (please see ' + 'documentation for dependencies). ' + 'The configuration options `minwidth`, ' + '`enforce_ratio` and `max_filesize` ' + 'may be violated.') return self.CANDIDATE_EXACT short_edge = min(self.size) long_edge = max(self.size) - # Check minimum size. + # Check minimum dimension. if plugin.minwidth and self.size[0] < plugin.minwidth: - self._log.debug(u'image too small ({} < {})', + self._log.debug('image too small ({} < {})', self.size[0], plugin.minwidth) return self.CANDIDATE_BAD @@ -109,38 +112,83 @@ class Candidate(object): if plugin.enforce_ratio: if plugin.margin_px: if edge_diff > plugin.margin_px: - self._log.debug(u'image is not close enough to being ' - u'square, ({} - {} > {})', + self._log.debug('image is not close enough to being ' + 'square, ({} - {} > {})', long_edge, short_edge, plugin.margin_px) return self.CANDIDATE_BAD elif plugin.margin_percent: margin_px = plugin.margin_percent * long_edge if edge_diff > margin_px: - self._log.debug(u'image is not close enough to being ' - u'square, ({} - {} > {})', + self._log.debug('image is not close enough to being ' + 'square, ({} - {} > {})', long_edge, short_edge, margin_px) return self.CANDIDATE_BAD elif edge_diff: # also reached for margin_px == 0 and margin_percent == 0.0 - self._log.debug(u'image is not square ({} != {})', + self._log.debug('image is not square ({} != {})', self.size[0], self.size[1]) return self.CANDIDATE_BAD - # Check maximum size. + # Check maximum dimension. + downscale = False if plugin.maxwidth and self.size[0] > plugin.maxwidth: - self._log.debug(u'image needs resizing ({} > {})', + self._log.debug('image needs rescaling ({} > {})', self.size[0], plugin.maxwidth) - return self.CANDIDATE_DOWNSCALE + downscale = True - return self.CANDIDATE_EXACT + # Check filesize. + downsize = False + if plugin.max_filesize: + filesize = os.stat(syspath(self.path)).st_size + if filesize > plugin.max_filesize: + self._log.debug('image needs resizing ({}B > {}B)', + filesize, plugin.max_filesize) + downsize = True + + # Check image format + reformat = False + if plugin.cover_format: + fmt = ArtResizer.shared.get_format(self.path) + reformat = fmt != plugin.cover_format + if reformat: + self._log.debug('image needs reformatting: {} -> {}', + fmt, plugin.cover_format) + + if downscale: + return self.CANDIDATE_DOWNSCALE + elif downsize: + return self.CANDIDATE_DOWNSIZE + elif plugin.deinterlace: + return self.CANDIDATE_DEINTERLACE + elif reformat: + return self.CANDIDATE_REFORMAT + else: + return self.CANDIDATE_EXACT def validate(self, plugin): self.check = self._validate(plugin) return self.check def resize(self, plugin): - if plugin.maxwidth and self.check == self.CANDIDATE_DOWNSCALE: - self.path = ArtResizer.shared.resize(plugin.maxwidth, self.path) + if self.check == self.CANDIDATE_DOWNSCALE: + self.path = \ + ArtResizer.shared.resize(plugin.maxwidth, self.path, + quality=plugin.quality, + max_filesize=plugin.max_filesize) + elif self.check == self.CANDIDATE_DOWNSIZE: + # dimensions are correct, so maxwidth is set to maximum dimension + self.path = \ + ArtResizer.shared.resize(max(self.size), self.path, + quality=plugin.quality, + max_filesize=plugin.max_filesize) + elif self.check == self.CANDIDATE_DEINTERLACE: + self.path = ArtResizer.shared.deinterlace(self.path) + elif self.check == self.CANDIDATE_REFORMAT: + self.path = ArtResizer.shared.reformat( + self.path, + plugin.cover_format, + deinterlaced=plugin.deinterlace, + ) def _logged_get(log, *args, **kwargs): @@ -169,14 +217,19 @@ def _logged_get(log, *args, **kwargs): message = 'getting URL' req = requests.Request('GET', *args, **req_kwargs) + with requests.Session() as s: s.headers = {'User-Agent': 'beets'} prepped = s.prepare_request(req) + settings = s.merge_environment_settings( + prepped.url, {}, None, None, None + ) + send_kwargs.update(settings) log.debug('{}: {}', message, prepped.url) return s.send(prepped, **send_kwargs) -class RequestMixin(object): +class RequestMixin: """Adds a Requests wrapper to the class that uses the logger, which must be named `self._log`. """ @@ -208,10 +261,13 @@ class ArtSource(RequestMixin): def fetch_image(self, candidate, plugin): raise NotImplementedError() + def cleanup(self, candidate): + pass + class LocalArtSource(ArtSource): IS_LOCAL = True - LOC_STR = u'local' + LOC_STR = 'local' def fetch_image(self, candidate, plugin): pass @@ -219,7 +275,7 @@ class LocalArtSource(ArtSource): class RemoteArtSource(ArtSource): IS_LOCAL = False - LOC_STR = u'remote' + LOC_STR = 'remote' def fetch_image(self, candidate, plugin): """Downloads an image from a URL and checks whether it seems to @@ -231,7 +287,7 @@ class RemoteArtSource(ArtSource): candidate.url) try: with closing(self.request(candidate.url, stream=True, - message=u'downloading image')) as resp: + message='downloading image')) as resp: ct = resp.headers.get('Content-Type', None) # Download the image to a temporary file. As some servers @@ -259,16 +315,16 @@ class RemoteArtSource(ArtSource): real_ct = ct if real_ct not in CONTENT_TYPES: - self._log.debug(u'not a supported image: {}', - real_ct or u'unknown content type') + self._log.debug('not a supported image: {}', + real_ct or 'unknown content type') return ext = b'.' + CONTENT_TYPES[real_ct][0] if real_ct != ct: - self._log.warning(u'Server specified {}, but returned a ' - u'{} image. Correcting the extension ' - u'to {}', + self._log.warning('Server specified {}, but returned a ' + '{} image. Correcting the extension ' + 'to {}', ct, real_ct, ext) suffix = py3_path(ext) @@ -278,45 +334,88 @@ class RemoteArtSource(ArtSource): # download the remaining part of the image for chunk in data: fh.write(chunk) - self._log.debug(u'downloaded art to: {0}', + self._log.debug('downloaded art to: {0}', util.displayable_path(fh.name)) candidate.path = util.bytestring_path(fh.name) return - except (IOError, requests.RequestException, TypeError) as exc: + except (OSError, requests.RequestException, TypeError) as exc: # Handling TypeError works around a urllib3 bug: # https://github.com/shazow/urllib3/issues/556 - self._log.debug(u'error fetching art: {}', exc) + self._log.debug('error fetching art: {}', exc) return + def cleanup(self, candidate): + if candidate.path: + try: + util.remove(path=candidate.path) + except util.FilesystemError as exc: + self._log.debug('error cleaning up tmp art: {}', exc) + class CoverArtArchive(RemoteArtSource): - NAME = u"Cover Art Archive" + NAME = "Cover Art Archive" VALID_MATCHING_CRITERIA = ['release', 'releasegroup'] + VALID_THUMBNAIL_SIZES = [250, 500, 1200] - if util.SNI_SUPPORTED: - URL = 'https://coverartarchive.org/release/{mbid}/front' - GROUP_URL = 'https://coverartarchive.org/release-group/{mbid}/front' - else: - URL = 'http://coverartarchive.org/release/{mbid}/front' - GROUP_URL = 'http://coverartarchive.org/release-group/{mbid}/front' + URL = 'https://coverartarchive.org/release/{mbid}' + GROUP_URL = 'https://coverartarchive.org/release-group/{mbid}' def get(self, album, plugin, paths): """Return the Cover Art Archive and Cover Art Archive release group URLs using album MusicBrainz release ID and release group ID. """ + + def get_image_urls(url, size_suffix=None): + try: + response = self.request(url) + except requests.RequestException: + self._log.debug('{}: error receiving response' + .format(self.NAME)) + return + + try: + data = response.json() + except ValueError: + self._log.debug('{}: error loading response: {}' + .format(self.NAME, response.text)) + return + + for item in data.get('images', []): + try: + if 'Front' not in item['types']: + continue + + if size_suffix: + yield item['thumbnails'][size_suffix] + else: + yield item['image'] + except KeyError: + pass + + release_url = self.URL.format(mbid=album.mb_albumid) + release_group_url = self.GROUP_URL.format(mbid=album.mb_releasegroupid) + + # Cover Art Archive API offers pre-resized thumbnails at several sizes. + # If the maxwidth config matches one of the already available sizes + # fetch it directly intead of fetching the full sized image and + # resizing it. + size_suffix = None + if plugin.maxwidth in self.VALID_THUMBNAIL_SIZES: + size_suffix = "-" + str(plugin.maxwidth) + if 'release' in self.match_by and album.mb_albumid: - yield self._candidate(url=self.URL.format(mbid=album.mb_albumid), - match=Candidate.MATCH_EXACT) + for url in get_image_urls(release_url, size_suffix): + yield self._candidate(url=url, match=Candidate.MATCH_EXACT) + if 'releasegroup' in self.match_by and album.mb_releasegroupid: - yield self._candidate( - url=self.GROUP_URL.format(mbid=album.mb_releasegroupid), - match=Candidate.MATCH_FALLBACK) + for url in get_image_urls(release_group_url): + yield self._candidate(url=url, match=Candidate.MATCH_FALLBACK) class Amazon(RemoteArtSource): - NAME = u"Amazon" - URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg' + NAME = "Amazon" + URL = 'https://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg' INDICES = (1, 2) def get(self, album, plugin, paths): @@ -329,8 +428,8 @@ class Amazon(RemoteArtSource): class AlbumArtOrg(RemoteArtSource): - NAME = u"AlbumArt.org scraper" - URL = 'http://www.albumart.org/index_detail.php' + NAME = "AlbumArt.org scraper" + URL = 'https://www.albumart.org/index_detail.php' PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"' def get(self, album, plugin, paths): @@ -341,9 +440,9 @@ class AlbumArtOrg(RemoteArtSource): # Get the page from albumart.org. try: resp = self.request(self.URL, params={'asin': album.asin}) - self._log.debug(u'scraped art URL: {0}', resp.url) + self._log.debug('scraped art URL: {0}', resp.url) except requests.RequestException: - self._log.debug(u'error scraping art page') + self._log.debug('error scraping art page') return # Search the page for the image URL. @@ -352,15 +451,15 @@ class AlbumArtOrg(RemoteArtSource): image_url = m.group(1) yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT) else: - self._log.debug(u'no image found on page') + self._log.debug('no image found on page') class GoogleImages(RemoteArtSource): - NAME = u"Google Images" - URL = u'https://www.googleapis.com/customsearch/v1' + NAME = "Google Images" + URL = 'https://www.googleapis.com/customsearch/v1' def __init__(self, *args, **kwargs): - super(GoogleImages, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.key = self._config['google_key'].get(), self.cx = self._config['google_engine'].get(), @@ -371,24 +470,29 @@ class GoogleImages(RemoteArtSource): if not (album.albumartist and album.album): return search_string = (album.albumartist + ',' + album.album).encode('utf-8') - response = self.request(self.URL, params={ - 'key': self.key, - 'cx': self.cx, - 'q': search_string, - 'searchType': 'image' - }) + + try: + response = self.request(self.URL, params={ + 'key': self.key, + 'cx': self.cx, + 'q': search_string, + 'searchType': 'image' + }) + except requests.RequestException: + self._log.debug('google: error receiving response') + return # Get results using JSON. try: data = response.json() except ValueError: - self._log.debug(u'google: error loading response: {}' + self._log.debug('google: error loading response: {}' .format(response.text)) return if 'error' in data: reason = data['error']['errors'][0]['reason'] - self._log.debug(u'google fetchart error: {0}', reason) + self._log.debug('google fetchart error: {0}', reason) return if 'items' in data.keys(): @@ -399,103 +503,142 @@ class GoogleImages(RemoteArtSource): class FanartTV(RemoteArtSource): """Art from fanart.tv requested using their API""" - NAME = u"fanart.tv" + NAME = "fanart.tv" API_URL = 'https://webservice.fanart.tv/v3/' API_ALBUMS = API_URL + 'music/albums/' PROJECT_KEY = '61a7d0ab4e67162b7a0c7c35915cd48e' def __init__(self, *args, **kwargs): - super(FanartTV, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.client_key = self._config['fanarttv_key'].get() def get(self, album, plugin, paths): if not album.mb_releasegroupid: return - response = self.request( - self.API_ALBUMS + album.mb_releasegroupid, - headers={'api-key': self.PROJECT_KEY, - 'client-key': self.client_key}) + try: + response = self.request( + self.API_ALBUMS + album.mb_releasegroupid, + headers={'api-key': self.PROJECT_KEY, + 'client-key': self.client_key}) + except requests.RequestException: + self._log.debug('fanart.tv: error receiving response') + return try: data = response.json() except ValueError: - self._log.debug(u'fanart.tv: error loading response: {}', + self._log.debug('fanart.tv: error loading response: {}', response.text) return - if u'status' in data and data[u'status'] == u'error': - if u'not found' in data[u'error message'].lower(): - self._log.debug(u'fanart.tv: no image found') - elif u'api key' in data[u'error message'].lower(): - self._log.warning(u'fanart.tv: Invalid API key given, please ' - u'enter a valid one in your config file.') + if 'status' in data and data['status'] == 'error': + if 'not found' in data['error message'].lower(): + self._log.debug('fanart.tv: no image found') + elif 'api key' in data['error message'].lower(): + self._log.warning('fanart.tv: Invalid API key given, please ' + 'enter a valid one in your config file.') else: - self._log.debug(u'fanart.tv: error on request: {}', - data[u'error message']) + self._log.debug('fanart.tv: error on request: {}', + data['error message']) return matches = [] # can there be more than one releasegroupid per response? - for mbid, art in data.get(u'albums', dict()).items(): + for mbid, art in data.get('albums', {}).items(): # there might be more art referenced, e.g. cdart, and an albumcover - # might not be present, even if the request was succesful - if album.mb_releasegroupid == mbid and u'albumcover' in art: - matches.extend(art[u'albumcover']) + # might not be present, even if the request was successful + if album.mb_releasegroupid == mbid and 'albumcover' in art: + matches.extend(art['albumcover']) # can this actually occur? else: - self._log.debug(u'fanart.tv: unexpected mb_releasegroupid in ' - u'response!') + self._log.debug('fanart.tv: unexpected mb_releasegroupid in ' + 'response!') - matches.sort(key=lambda x: x[u'likes'], reverse=True) + matches.sort(key=lambda x: x['likes'], reverse=True) for item in matches: # fanart.tv has a strict size requirement for album art to be # uploaded - yield self._candidate(url=item[u'url'], + yield self._candidate(url=item['url'], match=Candidate.MATCH_EXACT, size=(1000, 1000)) class ITunesStore(RemoteArtSource): - NAME = u"iTunes Store" + NAME = "iTunes Store" + API_URL = 'https://itunes.apple.com/search' def get(self, album, plugin, paths): """Return art URL from iTunes Store given an album title. """ if not (album.albumartist and album.album): return - search_string = (album.albumartist + ' ' + album.album).encode('utf-8') + + payload = { + 'term': album.albumartist + ' ' + album.album, + 'entity': 'album', + 'media': 'music', + 'limit': 200 + } try: - # Isolate bugs in the iTunes library while searching. + r = self.request(self.API_URL, params=payload) + r.raise_for_status() + except requests.RequestException as e: + self._log.debug('iTunes search failed: {0}', e) + return + + try: + candidates = r.json()['results'] + except ValueError as e: + self._log.debug('Could not decode json response: {0}', e) + return + except KeyError as e: + self._log.debug('{} not found in json. Fields are {} ', + e, + list(r.json().keys())) + return + + if not candidates: + self._log.debug('iTunes search for {!r} got no results', + payload['term']) + return + + if self._config['high_resolution']: + image_suffix = '100000x100000-999' + else: + image_suffix = '1200x1200bb' + + for c in candidates: try: - results = itunes.search_album(search_string) - except Exception as exc: - self._log.debug(u'iTunes search failed: {0}', exc) - return + if (c['artistName'] == album.albumartist + and c['collectionName'] == album.album): + art_url = c['artworkUrl100'] + art_url = art_url.replace('100x100bb', + image_suffix) + yield self._candidate(url=art_url, + match=Candidate.MATCH_EXACT) + except KeyError as e: + self._log.debug('Malformed itunes candidate: {} not found in {}', # NOQA E501 + e, + list(c.keys())) - # Get the first match. - if results: - itunes_album = results[0] - else: - self._log.debug(u'iTunes search for {:r} got no results', - search_string) - return - - if itunes_album.get_artwork()['100']: - small_url = itunes_album.get_artwork()['100'] - big_url = small_url.replace('100x100', '1200x1200') - yield self._candidate(url=big_url, match=Candidate.MATCH_EXACT) - else: - self._log.debug(u'album has no artwork in iTunes Store') - except IndexError: - self._log.debug(u'album not found in iTunes Store') + try: + fallback_art_url = candidates[0]['artworkUrl100'] + fallback_art_url = fallback_art_url.replace('100x100bb', + image_suffix) + yield self._candidate(url=fallback_art_url, + match=Candidate.MATCH_FALLBACK) + except KeyError as e: + self._log.debug('Malformed itunes candidate: {} not found in {}', + e, + list(c.keys())) class Wikipedia(RemoteArtSource): - NAME = u"Wikipedia (queried through DBpedia)" + NAME = "Wikipedia (queried through DBpedia)" DBPEDIA_URL = 'https://dbpedia.org/sparql' WIKIPEDIA_URL = 'https://en.wikipedia.org/w/api.php' - SPARQL_QUERY = u'''PREFIX rdf: + SPARQL_QUERY = '''PREFIX rdf: PREFIX dbpprop: PREFIX owl: PREFIX rdfs: @@ -523,16 +666,22 @@ class Wikipedia(RemoteArtSource): # Find the name of the cover art filename on DBpedia cover_filename, page_id = None, None - dbpedia_response = self.request( - self.DBPEDIA_URL, - params={ - 'format': 'application/sparql-results+json', - 'timeout': 2500, - 'query': self.SPARQL_QUERY.format( - artist=album.albumartist.title(), album=album.album) - }, - headers={'content-type': 'application/json'}, - ) + + try: + dbpedia_response = self.request( + self.DBPEDIA_URL, + params={ + 'format': 'application/sparql-results+json', + 'timeout': 2500, + 'query': self.SPARQL_QUERY.format( + artist=album.albumartist.title(), album=album.album) + }, + headers={'content-type': 'application/json'}, + ) + except requests.RequestException: + self._log.debug('dbpedia: error receiving response') + return + try: data = dbpedia_response.json() results = data['results']['bindings'] @@ -540,9 +689,9 @@ class Wikipedia(RemoteArtSource): cover_filename = 'File:' + results[0]['coverFilename']['value'] page_id = results[0]['pageId']['value'] else: - self._log.debug(u'wikipedia: album not found on dbpedia') + self._log.debug('wikipedia: album not found on dbpedia') except (ValueError, KeyError, IndexError): - self._log.debug(u'wikipedia: error scraping dbpedia response: {}', + self._log.debug('wikipedia: error scraping dbpedia response: {}', dbpedia_response.text) # Ensure we have a filename before attempting to query wikipedia @@ -557,25 +706,29 @@ class Wikipedia(RemoteArtSource): if ' .' in cover_filename and \ '.' not in cover_filename.split(' .')[-1]: self._log.debug( - u'wikipedia: dbpedia provided incomplete cover_filename' + 'wikipedia: dbpedia provided incomplete cover_filename' ) lpart, rpart = cover_filename.rsplit(' .', 1) # Query all the images in the page - wikipedia_response = self.request( - self.WIKIPEDIA_URL, - params={ - 'format': 'json', - 'action': 'query', - 'continue': '', - 'prop': 'images', - 'pageids': page_id, - }, - headers={'content-type': 'application/json'}, - ) + try: + wikipedia_response = self.request( + self.WIKIPEDIA_URL, + params={ + 'format': 'json', + 'action': 'query', + 'continue': '', + 'prop': 'images', + 'pageids': page_id, + }, + headers={'content-type': 'application/json'}, + ) + except requests.RequestException: + self._log.debug('wikipedia: error receiving response') + return # Try to see if one of the images on the pages matches our - # imcomplete cover_filename + # incomplete cover_filename try: data = wikipedia_response.json() results = data['query']['pages'][page_id]['images'] @@ -586,23 +739,27 @@ class Wikipedia(RemoteArtSource): break except (ValueError, KeyError): self._log.debug( - u'wikipedia: failed to retrieve a cover_filename' + 'wikipedia: failed to retrieve a cover_filename' ) return # Find the absolute url of the cover art on Wikipedia - wikipedia_response = self.request( - self.WIKIPEDIA_URL, - params={ - 'format': 'json', - 'action': 'query', - 'continue': '', - 'prop': 'imageinfo', - 'iiprop': 'url', - 'titles': cover_filename.encode('utf-8'), - }, - headers={'content-type': 'application/json'}, - ) + try: + wikipedia_response = self.request( + self.WIKIPEDIA_URL, + params={ + 'format': 'json', + 'action': 'query', + 'continue': '', + 'prop': 'imageinfo', + 'iiprop': 'url', + 'titles': cover_filename.encode('utf-8'), + }, + headers={'content-type': 'application/json'}, + ) + except requests.RequestException: + self._log.debug('wikipedia: error receiving response') + return try: data = wikipedia_response.json() @@ -612,12 +769,12 @@ class Wikipedia(RemoteArtSource): yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT) except (ValueError, KeyError, IndexError): - self._log.debug(u'wikipedia: error scraping imageinfo') + self._log.debug('wikipedia: error scraping imageinfo') return class FileSystem(LocalArtSource): - NAME = u"Filesystem" + NAME = "Filesystem" @staticmethod def filename_priority(filename, cover_names): @@ -644,12 +801,16 @@ class FileSystem(LocalArtSource): # Find all files that look like images in the directory. images = [] - for fn in os.listdir(syspath(path)): - fn = bytestring_path(fn) - for ext in IMAGE_EXTENSIONS: - if fn.lower().endswith(b'.' + ext) and \ - os.path.isfile(syspath(os.path.join(path, fn))): - images.append(fn) + ignore = config['ignore'].as_str_seq() + ignore_hidden = config['ignore_hidden'].get(bool) + for _, _, files in sorted_walk(path, ignore=ignore, + ignore_hidden=ignore_hidden): + for fn in files: + fn = bytestring_path(fn) + for ext in IMAGE_EXTENSIONS: + if fn.lower().endswith(b'.' + ext) and \ + os.path.isfile(syspath(os.path.join(path, fn))): + images.append(fn) # Look for "preferred" filenames. images = sorted(images, @@ -658,7 +819,7 @@ class FileSystem(LocalArtSource): remaining = [] for fn in images: if re.search(cover_pat, os.path.splitext(fn)[0], re.I): - self._log.debug(u'using well-named art file {0}', + self._log.debug('using well-named art file {0}', util.displayable_path(fn)) yield self._candidate(path=os.path.join(path, fn), match=Candidate.MATCH_EXACT) @@ -667,27 +828,86 @@ class FileSystem(LocalArtSource): # Fall back to any image in the folder. if remaining and not plugin.cautious: - self._log.debug(u'using fallback art file {0}', + self._log.debug('using fallback art file {0}', util.displayable_path(remaining[0])) yield self._candidate(path=os.path.join(path, remaining[0]), match=Candidate.MATCH_FALLBACK) +class LastFM(RemoteArtSource): + NAME = "Last.fm" + + # Sizes in priority order. + SIZES = OrderedDict([ + ('mega', (300, 300)), + ('extralarge', (300, 300)), + ('large', (174, 174)), + ('medium', (64, 64)), + ('small', (34, 34)), + ]) + + API_URL = 'https://ws.audioscrobbler.com/2.0' + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.key = self._config['lastfm_key'].get(), + + def get(self, album, plugin, paths): + if not album.mb_albumid: + return + + try: + response = self.request(self.API_URL, params={ + 'method': 'album.getinfo', + 'api_key': self.key, + 'mbid': album.mb_albumid, + 'format': 'json', + }) + except requests.RequestException: + self._log.debug('lastfm: error receiving response') + return + + try: + data = response.json() + + if 'error' in data: + if data['error'] == 6: + self._log.debug('lastfm: no results for {}', + album.mb_albumid) + else: + self._log.error( + 'lastfm: failed to get album info: {} ({})', + data['message'], data['error']) + else: + images = {image['size']: image['#text'] + for image in data['album']['image']} + + # Provide candidates in order of size. + for size in self.SIZES.keys(): + if size in images: + yield self._candidate(url=images[size], + size=self.SIZES[size]) + except ValueError: + self._log.debug('lastfm: error loading response: {}' + .format(response.text)) + return + # Try each source in turn. -SOURCES_ALL = [u'filesystem', - u'coverart', u'itunes', u'amazon', u'albumart', - u'wikipedia', u'google', u'fanarttv'] +SOURCES_ALL = ['filesystem', + 'coverart', 'itunes', 'amazon', 'albumart', + 'wikipedia', 'google', 'fanarttv', 'lastfm'] ART_SOURCES = { - u'filesystem': FileSystem, - u'coverart': CoverArtArchive, - u'itunes': ITunesStore, - u'albumart': AlbumArtOrg, - u'amazon': Amazon, - u'wikipedia': Wikipedia, - u'google': GoogleImages, - u'fanarttv': FanartTV, + 'filesystem': FileSystem, + 'coverart': CoverArtArchive, + 'itunes': ITunesStore, + 'albumart': AlbumArtOrg, + 'amazon': Amazon, + 'wikipedia': Wikipedia, + 'google': GoogleImages, + 'fanarttv': FanartTV, + 'lastfm': LastFM, } SOURCE_NAMES = {v: k for k, v in ART_SOURCES.items()} @@ -699,7 +919,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): PAT_PERCENT = r"(100(\.00?)?|[1-9]?[0-9](\.[0-9]{1,2})?)%" def __init__(self): - super(FetchArtPlugin, self).__init__() + super().__init__() # Holds candidates corresponding to downloaded images between # fetching them and placing them in the filesystem. @@ -709,37 +929,47 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): 'auto': True, 'minwidth': 0, 'maxwidth': 0, + 'quality': 0, + 'max_filesize': 0, 'enforce_ratio': False, 'cautious': False, 'cover_names': ['cover', 'front', 'art', 'album', 'folder'], 'sources': ['filesystem', 'coverart', 'itunes', 'amazon', 'albumart'], 'google_key': None, - 'google_engine': u'001442825323518660753:hrh5ch1gjzm', + 'google_engine': '001442825323518660753:hrh5ch1gjzm', 'fanarttv_key': None, + 'lastfm_key': None, 'store_source': False, + 'high_resolution': False, + 'deinterlace': False, + 'cover_format': None, }) self.config['google_key'].redact = True self.config['fanarttv_key'].redact = True + self.config['lastfm_key'].redact = True self.minwidth = self.config['minwidth'].get(int) self.maxwidth = self.config['maxwidth'].get(int) + self.max_filesize = self.config['max_filesize'].get(int) + self.quality = self.config['quality'].get(int) # allow both pixel and percentage-based margin specifications self.enforce_ratio = self.config['enforce_ratio'].get( - confit.OneOf([bool, - confit.String(pattern=self.PAT_PX), - confit.String(pattern=self.PAT_PERCENT)])) + confuse.OneOf([bool, + confuse.String(pattern=self.PAT_PX), + confuse.String(pattern=self.PAT_PERCENT)])) self.margin_px = None self.margin_percent = None - if type(self.enforce_ratio) is six.text_type: - if self.enforce_ratio[-1] == u'%': + self.deinterlace = self.config['deinterlace'].get(bool) + if type(self.enforce_ratio) is str: + if self.enforce_ratio[-1] == '%': self.margin_percent = float(self.enforce_ratio[:-1]) / 100 - elif self.enforce_ratio[-2:] == u'px': + elif self.enforce_ratio[-2:] == 'px': self.margin_px = int(self.enforce_ratio[:-2]) else: # shouldn't happen - raise confit.ConfigValueError() + raise confuse.ConfigValueError() self.enforce_ratio = True cover_names = self.config['cover_names'].as_str_seq() @@ -750,17 +980,22 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): self.src_removed = (config['import']['delete'].get(bool) or config['import']['move'].get(bool)) + self.cover_format = self.config['cover_format'].get( + confuse.Optional(str) + ) + if self.config['auto']: # Enable two import hooks when fetching is enabled. self.import_stages = [self.fetch_art] self.register_listener('import_task_files', self.assign_art) available_sources = list(SOURCES_ALL) - if not HAVE_ITUNES and u'itunes' in available_sources: - available_sources.remove(u'itunes') if not self.config['google_key'].get() and \ - u'google' in available_sources: - available_sources.remove(u'google') + 'google' in available_sources: + available_sources.remove('google') + if not self.config['lastfm_key'].get() and \ + 'lastfm' in available_sources: + available_sources.remove('lastfm') available_sources = [(s, c) for s in available_sources for c in ART_SOURCES[s].VALID_MATCHING_CRITERIA] @@ -770,9 +1005,9 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): if 'remote_priority' in self.config: self._log.warning( - u'The `fetch_art.remote_priority` configuration option has ' - u'been deprecated. Instead, place `filesystem` at the end of ' - u'your `sources` list.') + 'The `fetch_art.remote_priority` configuration option has ' + 'been deprecated. Instead, place `filesystem` at the end of ' + 'your `sources` list.') if self.config['remote_priority'].get(bool): fs = [] others = [] @@ -814,7 +1049,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): if self.store_source: # store the source of the chosen artwork in a flexible field self._log.debug( - u"Storing art_source for {0.albumartist} - {0.album}", + "Storing art_source for {0.albumartist} - {0.album}", album) album.art_source = SOURCE_NAMES[type(candidate.source)] album.store() @@ -834,14 +1069,14 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): def commands(self): cmd = ui.Subcommand('fetchart', help='download album art') cmd.parser.add_option( - u'-f', u'--force', dest='force', + '-f', '--force', dest='force', action='store_true', default=False, - help=u're-download art when already present' + help='re-download art when already present' ) cmd.parser.add_option( - u'-q', u'--quiet', dest='quiet', + '-q', '--quiet', dest='quiet', action='store_true', default=False, - help=u'shows only quiet art' + help='quiet mode: do not output albums that already have artwork' ) def func(lib, opts, args): @@ -855,16 +1090,17 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): def art_for_album(self, album, paths, local_only=False): """Given an Album object, returns a path to downloaded art for the album (or None if no art is found). If `maxwidth`, then images are - resized to this maximum pixel size. If `local_only`, then only local - image files from the filesystem are returned; no network requests - are made. + resized to this maximum pixel size. If `quality` then resized images + are saved at the specified quality level. If `local_only`, then only + local image files from the filesystem are returned; no network + requests are made. """ out = None for source in self.sources: if source.IS_LOCAL or not local_only: self._log.debug( - u'trying source {0} for album {1.albumartist} - {1.album}', + 'trying source {0} for album {1.albumartist} - {1.album}', SOURCE_NAMES[type(source)], album, ) @@ -875,9 +1111,11 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): if candidate.validate(self): out = candidate self._log.debug( - u'using {0.LOC_STR} image {1}'.format( + 'using {0.LOC_STR} image {1}'.format( source, util.displayable_path(out.path))) break + # Remove temporary files for invalid candidates. + source.cleanup(candidate) if out: break @@ -894,8 +1132,8 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): if album.artpath and not force and os.path.isfile(album.artpath): if not quiet: message = ui.colorize('text_highlight_minor', - u'has album art') - self._log.info(u'{0}: {1}', album, message) + 'has album art') + self._log.info('{0}: {1}', album, message) else: # In ordinary invocations, look for images on the # filesystem. When forcing, however, always go to the Web @@ -905,7 +1143,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): candidate = self.art_for_album(album, local_paths) if candidate: self._set_art(album, candidate) - message = ui.colorize('text_success', u'found album art') + message = ui.colorize('text_success', 'found album art') else: - message = ui.colorize('text_error', u'no art found') - self._log.info(u'{0}: {1}', album, message) + message = ui.colorize('text_error', 'no art found') + self._log.info('{0}: {1}', album, message) diff --git a/libs/common/beetsplug/filefilter.py b/libs/common/beetsplug/filefilter.py index 23dac574..ec8fddb4 100644 --- a/libs/common/beetsplug/filefilter.py +++ b/libs/common/beetsplug/filefilter.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Malte Ried. # @@ -16,7 +15,6 @@ """Filter imported files using a regular expression. """ -from __future__ import division, absolute_import, print_function import re from beets import config @@ -27,7 +25,7 @@ from beets.importer import SingletonImportTask class FileFilterPlugin(BeetsPlugin): def __init__(self): - super(FileFilterPlugin, self).__init__() + super().__init__() self.register_listener('import_task_created', self.import_task_created_event) self.config.add({ @@ -43,8 +41,8 @@ class FileFilterPlugin(BeetsPlugin): bytestring_path(self.config['album_path'].get())) if 'singleton_path' in self.config: - self.path_singleton_regex = re.compile( - bytestring_path(self.config['singleton_path'].get())) + self.path_singleton_regex = re.compile( + bytestring_path(self.config['singleton_path'].get())) def import_task_created_event(self, session, task): if task.items and len(task.items) > 0: diff --git a/libs/common/beetsplug/fish.py b/libs/common/beetsplug/fish.py new file mode 100644 index 00000000..21fd67f6 --- /dev/null +++ b/libs/common/beetsplug/fish.py @@ -0,0 +1,285 @@ +# This file is part of beets. +# Copyright 2015, winters jean-marie. +# Copyright 2020, Justin Mayer +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""This plugin generates tab completions for Beets commands for the Fish shell +, including completions for Beets commands, plugin +commands, and option flags. Also generated are completions for all the album +and track fields, suggesting for example `genre:` or `album:` when querying the +Beets database. Completions for the *values* of those fields are not generated +by default but can be added via the `-e` / `--extravalues` flag. For example: +`beet fish -e genre -e albumartist` +""" + + +from beets.plugins import BeetsPlugin +from beets import library, ui +from beets.ui import commands +from operator import attrgetter +import os +BL_NEED2 = """complete -c beet -n '__fish_beet_needs_command' {} {}\n""" +BL_USE3 = """complete -c beet -n '__fish_beet_using_command {}' {} {}\n""" +BL_SUBS = """complete -c beet -n '__fish_at_level {} ""' {} {}\n""" +BL_EXTRA3 = """complete -c beet -n '__fish_beet_use_extra {}' {} {}\n""" + +HEAD = ''' +function __fish_beet_needs_command + set cmd (commandline -opc) + if test (count $cmd) -eq 1 + return 0 + end + return 1 +end + +function __fish_beet_using_command + set cmd (commandline -opc) + set needle (count $cmd) + if test $needle -gt 1 + if begin test $argv[1] = $cmd[2]; + and not contains -- $cmd[$needle] $FIELDS; end + return 0 + end + end + return 1 +end + +function __fish_beet_use_extra + set cmd (commandline -opc) + set needle (count $cmd) + if test $argv[2] = $cmd[$needle] + return 0 + end + return 1 +end +''' + + +class FishPlugin(BeetsPlugin): + + def commands(self): + cmd = ui.Subcommand('fish', help='generate Fish shell tab completions') + cmd.func = self.run + cmd.parser.add_option('-f', '--noFields', action='store_true', + default=False, + help='omit album/track field completions') + cmd.parser.add_option( + '-e', + '--extravalues', + action='append', + type='choice', + choices=library.Item.all_keys() + + library.Album.all_keys(), + help='include specified field *values* in completions') + return [cmd] + + def run(self, lib, opts, args): + # Gather the commands from Beets core and its plugins. + # Collect the album and track fields. + # If specified, also collect the values for these fields. + # Make a giant string of all the above, formatted in a way that + # allows Fish to do tab completion for the `beet` command. + home_dir = os.path.expanduser("~") + completion_dir = os.path.join(home_dir, '.config/fish/completions') + try: + os.makedirs(completion_dir) + except OSError: + if not os.path.isdir(completion_dir): + raise + completion_file_path = os.path.join(completion_dir, 'beet.fish') + nobasicfields = opts.noFields # Do not complete for album/track fields + extravalues = opts.extravalues # e.g., Also complete artists names + beetcmds = sorted( + (commands.default_commands + + commands.plugins.commands()), + key=attrgetter('name')) + fields = sorted(set( + library.Album.all_keys() + library.Item.all_keys())) + # Collect commands, their aliases, and their help text + cmd_names_help = [] + for cmd in beetcmds: + names = list(cmd.aliases) + names.append(cmd.name) + for name in names: + cmd_names_help.append((name, cmd.help)) + # Concatenate the string + totstring = HEAD + "\n" + totstring += get_cmds_list([name[0] for name in cmd_names_help]) + totstring += '' if nobasicfields else get_standard_fields(fields) + totstring += get_extravalues(lib, extravalues) if extravalues else '' + totstring += "\n" + "# ====== {} =====".format( + "setup basic beet completion") + "\n" * 2 + totstring += get_basic_beet_options() + totstring += "\n" + "# ====== {} =====".format( + "setup field completion for subcommands") + "\n" + totstring += get_subcommands( + cmd_names_help, nobasicfields, extravalues) + # Set up completion for all the command options + totstring += get_all_commands(beetcmds) + + with open(completion_file_path, 'w') as fish_file: + fish_file.write(totstring) + + +def _escape(name): + # Escape ? in fish + if name == "?": + name = "\\" + name + return name + + +def get_cmds_list(cmds_names): + # Make a list of all Beets core & plugin commands + substr = '' + substr += ( + "set CMDS " + " ".join(cmds_names) + ("\n" * 2) + ) + return substr + + +def get_standard_fields(fields): + # Make a list of album/track fields and append with ':' + fields = (field + ":" for field in fields) + substr = '' + substr += ( + "set FIELDS " + " ".join(fields) + ("\n" * 2) + ) + return substr + + +def get_extravalues(lib, extravalues): + # Make a list of all values from an album/track field. + # 'beet ls albumartist: ' yields completions for ABBA, Beatles, etc. + word = '' + values_set = get_set_of_values_for_field(lib, extravalues) + for fld in extravalues: + extraname = fld.upper() + 'S' + word += ( + "set " + extraname + " " + " ".join(sorted(values_set[fld])) + + ("\n" * 2) + ) + return word + + +def get_set_of_values_for_field(lib, fields): + # Get unique values from a specified album/track field + fields_dict = {} + for each in fields: + fields_dict[each] = set() + for item in lib.items(): + for field in fields: + fields_dict[field].add(wrap(item[field])) + return fields_dict + + +def get_basic_beet_options(): + word = ( + BL_NEED2.format("-l format-item", + "-f -d 'print with custom format'") + + BL_NEED2.format("-l format-album", + "-f -d 'print with custom format'") + + BL_NEED2.format("-s l -l library", + "-f -r -d 'library database file to use'") + + BL_NEED2.format("-s d -l directory", + "-f -r -d 'destination music directory'") + + BL_NEED2.format("-s v -l verbose", + "-f -d 'print debugging information'") + + + BL_NEED2.format("-s c -l config", + "-f -r -d 'path to configuration file'") + + BL_NEED2.format("-s h -l help", + "-f -d 'print this help message and exit'")) + return word + + +def get_subcommands(cmd_name_and_help, nobasicfields, extravalues): + # Formatting for Fish to complete our fields/values + word = "" + for cmdname, cmdhelp in cmd_name_and_help: + cmdname = _escape(cmdname) + + word += "\n" + "# ------ {} -------".format( + "fieldsetups for " + cmdname) + "\n" + word += ( + BL_NEED2.format( + ("-a " + cmdname), + ("-f " + "-d " + wrap(clean_whitespace(cmdhelp))))) + + if nobasicfields is False: + word += ( + BL_USE3.format( + cmdname, + ("-a " + wrap("$FIELDS")), + ("-f " + "-d " + wrap("fieldname")))) + + if extravalues: + for f in extravalues: + setvar = wrap("$" + f.upper() + "S") + word += " ".join(BL_EXTRA3.format( + (cmdname + " " + f + ":"), + ('-f ' + '-A ' + '-a ' + setvar), + ('-d ' + wrap(f))).split()) + "\n" + return word + + +def get_all_commands(beetcmds): + # Formatting for Fish to complete command options + word = "" + for cmd in beetcmds: + names = list(cmd.aliases) + names.append(cmd.name) + for name in names: + name = _escape(name) + + word += "\n" + word += ("\n" * 2) + "# ====== {} =====".format( + "completions for " + name) + "\n" + + for option in cmd.parser._get_all_options()[1:]: + cmd_l = (" -l " + option._long_opts[0].replace('--', '') + )if option._long_opts else '' + cmd_s = (" -s " + option._short_opts[0].replace('-', '') + ) if option._short_opts else '' + cmd_need_arg = ' -r ' if option.nargs in [1] else '' + cmd_helpstr = (" -d " + wrap(' '.join(option.help.split())) + ) if option.help else '' + cmd_arglist = (' -a ' + wrap(" ".join(option.choices)) + ) if option.choices else '' + + word += " ".join(BL_USE3.format( + name, + (cmd_need_arg + cmd_s + cmd_l + " -f " + cmd_arglist), + cmd_helpstr).split()) + "\n" + + word = (word + " ".join(BL_USE3.format( + name, + ("-s " + "h " + "-l " + "help" + " -f "), + ('-d ' + wrap("print help") + "\n") + ).split())) + return word + + +def clean_whitespace(word): + # Remove excess whitespace and tabs in a string + return " ".join(word.split()) + + +def wrap(word): + # Need " or ' around strings but watch out if they're in the string + sptoken = '\"' + if ('"') in word and ("'") in word: + word.replace('"', sptoken) + return '"' + word + '"' + + tok = '"' if "'" in word else "'" + return tok + word + tok diff --git a/libs/common/beetsplug/freedesktop.py b/libs/common/beetsplug/freedesktop.py index a768be2d..ba4d5879 100644 --- a/libs/common/beetsplug/freedesktop.py +++ b/libs/common/beetsplug/freedesktop.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Matt Lichtenberg. # @@ -16,7 +15,6 @@ """Creates freedesktop.org-compliant .directory files on an album level. """ -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets import ui @@ -26,12 +24,12 @@ class FreedesktopPlugin(BeetsPlugin): def commands(self): deprecated = ui.Subcommand( "freedesktop", - help=u"Print a message to redirect to thumbnails --dolphin") + help="Print a message to redirect to thumbnails --dolphin") deprecated.func = self.deprecation_message return [deprecated] def deprecation_message(self, lib, opts, args): - ui.print_(u"This plugin is deprecated. Its functionality is " - u"superseded by the 'thumbnails' plugin") - ui.print_(u"'thumbnails --dolphin' replaces freedesktop. See doc & " - u"changelog for more information") + ui.print_("This plugin is deprecated. Its functionality is " + "superseded by the 'thumbnails' plugin") + ui.print_("'thumbnails --dolphin' replaces freedesktop. See doc & " + "changelog for more information") diff --git a/libs/common/beetsplug/fromfilename.py b/libs/common/beetsplug/fromfilename.py index 56b68f75..55684a27 100644 --- a/libs/common/beetsplug/fromfilename.py +++ b/libs/common/beetsplug/fromfilename.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Jan-Erik Dahlin # @@ -16,13 +15,11 @@ """If the title is empty, try to extract track and title from the filename. """ -from __future__ import division, absolute_import, print_function from beets import plugins from beets.util import displayable_path import os import re -import six # Filename field extraction patterns. @@ -124,7 +121,7 @@ def apply_matches(d): # Apply the title and track. for item in d: if bad_title(item.title): - item.title = six.text_type(d[item][title_field]) + item.title = str(d[item][title_field]) if 'track' in d[item] and item.track == 0: item.track = int(d[item]['track']) @@ -133,7 +130,7 @@ def apply_matches(d): class FromFilenamePlugin(plugins.BeetsPlugin): def __init__(self): - super(FromFilenamePlugin, self).__init__() + super().__init__() self.register_listener('import_task_start', filename_task) diff --git a/libs/common/beetsplug/ftintitle.py b/libs/common/beetsplug/ftintitle.py index 9303f9cf..57863d2b 100644 --- a/libs/common/beetsplug/ftintitle.py +++ b/libs/common/beetsplug/ftintitle.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Verrus, # @@ -15,7 +14,6 @@ """Moves "featured" artists to the title from the artist field. """ -from __future__ import division, absolute_import, print_function import re @@ -75,22 +73,22 @@ def find_feat_part(artist, albumartist): class FtInTitlePlugin(plugins.BeetsPlugin): def __init__(self): - super(FtInTitlePlugin, self).__init__() + super().__init__() self.config.add({ 'auto': True, 'drop': False, - 'format': u'feat. {0}', + 'format': 'feat. {0}', }) self._command = ui.Subcommand( 'ftintitle', - help=u'move featured artists to the title field') + help='move featured artists to the title field') self._command.parser.add_option( - u'-d', u'--drop', dest='drop', + '-d', '--drop', dest='drop', action='store_true', default=None, - help=u'drop featuring from artists and ignore title update') + help='drop featuring from artists and ignore title update') if self.config['auto']: self.import_stages = [self.imported] @@ -127,7 +125,7 @@ class FtInTitlePlugin(plugins.BeetsPlugin): remove it from the artist field. """ # In all cases, update the artist fields. - self._log.info(u'artist: {0} -> {1}', item.artist, item.albumartist) + self._log.info('artist: {0} -> {1}', item.artist, item.albumartist) item.artist = item.albumartist if item.artist_sort: # Just strip the featured artist from the sort name. @@ -138,8 +136,8 @@ class FtInTitlePlugin(plugins.BeetsPlugin): if not drop_feat and not contains_feat(item.title): feat_format = self.config['format'].as_str() new_format = feat_format.format(feat_part) - new_title = u"{0} {1}".format(item.title, new_format) - self._log.info(u'title: {0} -> {1}', item.title, new_title) + new_title = f"{item.title} {new_format}" + self._log.info('title: {0} -> {1}', item.title, new_title) item.title = new_title def ft_in_title(self, item, drop_feat): @@ -165,4 +163,4 @@ class FtInTitlePlugin(plugins.BeetsPlugin): if feat_part: self.update_metadata(item, feat_part, drop_feat) else: - self._log.info(u'no featuring artists found') + self._log.info('no featuring artists found') diff --git a/libs/common/beetsplug/fuzzy.py b/libs/common/beetsplug/fuzzy.py index a7308a52..41829639 100644 --- a/libs/common/beetsplug/fuzzy.py +++ b/libs/common/beetsplug/fuzzy.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Philippe Mongeau. # @@ -16,7 +15,6 @@ """Provides a fuzzy matching query. """ -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets.dbcore.query import StringFieldQuery @@ -37,7 +35,7 @@ class FuzzyQuery(StringFieldQuery): class FuzzyPlugin(BeetsPlugin): def __init__(self): - super(FuzzyPlugin, self).__init__() + super().__init__() self.config.add({ 'prefix': '~', 'threshold': 0.7, diff --git a/libs/common/beetsplug/gmusic.py b/libs/common/beetsplug/gmusic.py index 259d2725..844234f9 100644 --- a/libs/common/beetsplug/gmusic.py +++ b/libs/common/beetsplug/gmusic.py @@ -1,6 +1,4 @@ -# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2017, Tigran Kostandyan. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -13,84 +11,15 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -"""Upload files to Google Play Music and list songs in its library.""" - -from __future__ import absolute_import, division, print_function -import os.path +"""Deprecation warning for the removed gmusic plugin.""" from beets.plugins import BeetsPlugin -from beets import ui -from beets import config -from beets.ui import Subcommand -from gmusicapi import Musicmanager, Mobileclient -from gmusicapi.exceptions import NotLoggedIn -import gmusicapi.clients class Gmusic(BeetsPlugin): def __init__(self): - super(Gmusic, self).__init__() - # Checks for OAuth2 credentials, - # if they don't exist - performs authorization - self.m = Musicmanager() - if os.path.isfile(gmusicapi.clients.OAUTH_FILEPATH): - self.m.login() - else: - self.m.perform_oauth() + super().__init__() - def commands(self): - gupload = Subcommand('gmusic-upload', - help=u'upload your tracks to Google Play Music') - gupload.func = self.upload - - search = Subcommand('gmusic-songs', - help=u'list of songs in Google Play Music library' - ) - search.parser.add_option('-t', '--track', dest='track', - action='store_true', - help='Search by track name') - search.parser.add_option('-a', '--artist', dest='artist', - action='store_true', - help='Search by artist') - search.func = self.search - return [gupload, search] - - def upload(self, lib, opts, args): - items = lib.items(ui.decargs(args)) - files = [x.path.decode('utf-8') for x in items] - ui.print_(u'Uploading your files...') - self.m.upload(filepaths=files) - ui.print_(u'Your files were successfully added to library') - - def search(self, lib, opts, args): - password = config['gmusic']['password'] - email = config['gmusic']['email'] - password.redact = True - email.redact = True - # Since Musicmanager doesn't support library management - # we need to use mobileclient interface - mobile = Mobileclient() - try: - mobile.login(email.as_str(), password.as_str(), - Mobileclient.FROM_MAC_ADDRESS) - files = mobile.get_all_songs() - except NotLoggedIn: - ui.print_( - u'Authentication error. Please check your email and password.' - ) - return - if not args: - for i, file in enumerate(files, start=1): - print(i, ui.colorize('blue', file['artist']), - file['title'], ui.colorize('red', file['album'])) - else: - if opts.track: - self.match(files, args, 'title') - else: - self.match(files, args, 'artist') - - @staticmethod - def match(files, args, search_by): - for file in files: - if ' '.join(ui.decargs(args)) in file[search_by]: - print(file['artist'], file['title'], file['album']) + self._log.warning("The 'gmusic' plugin has been removed following the" + " shutdown of Google Play Music. Remove the plugin" + " from your configuration to silence this warning.") diff --git a/libs/common/beetsplug/hook.py b/libs/common/beetsplug/hook.py index b6270fd5..0fe3bffc 100644 --- a/libs/common/beetsplug/hook.py +++ b/libs/common/beetsplug/hook.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2015, Adrian Sampson. # @@ -14,14 +13,13 @@ # included in all copies or substantial portions of the Software. """Allows custom commands to be run when an event is emitted by beets""" -from __future__ import division, absolute_import, print_function import string import subprocess -import six +import shlex from beets.plugins import BeetsPlugin -from beets.util import shlex_split, arg_encoding +from beets.util import arg_encoding class CodingFormatter(string.Formatter): @@ -46,13 +44,11 @@ class CodingFormatter(string.Formatter): See str.format and string.Formatter.format. """ - try: + if isinstance(format_string, bytes): format_string = format_string.decode(self._coding) - except UnicodeEncodeError: - pass - return super(CodingFormatter, self).format(format_string, *args, - **kwargs) + return super().format(format_string, *args, + **kwargs) def convert_field(self, value, conversion): """Converts the provided value given a conversion type. @@ -61,8 +57,8 @@ class CodingFormatter(string.Formatter): See string.Formatter.convert_field. """ - converted = super(CodingFormatter, self).convert_field(value, - conversion) + converted = super().convert_field(value, + conversion) if isinstance(converted, bytes): return converted.decode(self._coding) @@ -72,8 +68,9 @@ class CodingFormatter(string.Formatter): class HookPlugin(BeetsPlugin): """Allows custom commands to be run when an event is emitted by beets""" + def __init__(self): - super(HookPlugin, self).__init__() + super().__init__() self.config.add({ 'hooks': [] @@ -91,28 +88,28 @@ class HookPlugin(BeetsPlugin): def create_and_register_hook(self, event, command): def hook_function(**kwargs): - if command is None or len(command) == 0: - self._log.error('invalid command "{0}"', command) - return + if command is None or len(command) == 0: + self._log.error('invalid command "{0}"', command) + return - # Use a string formatter that works on Unicode strings. - if six.PY2: - formatter = CodingFormatter(arg_encoding()) - else: - formatter = string.Formatter() + # Use a string formatter that works on Unicode strings. + formatter = CodingFormatter(arg_encoding()) - command_pieces = shlex_split(command) + command_pieces = shlex.split(command) - for i, piece in enumerate(command_pieces): - command_pieces[i] = formatter.format(piece, event=event, - **kwargs) + for i, piece in enumerate(command_pieces): + command_pieces[i] = formatter.format(piece, event=event, + **kwargs) - self._log.debug(u'running command "{0}" for event {1}', - u' '.join(command_pieces), event) + self._log.debug('running command "{0}" for event {1}', + ' '.join(command_pieces), event) - try: - subprocess.Popen(command_pieces).wait() - except OSError as exc: - self._log.error(u'hook for {0} failed: {1}', event, exc) + try: + subprocess.check_call(command_pieces) + except subprocess.CalledProcessError as exc: + self._log.error('hook for {0} exited with status {1}', + event, exc.returncode) + except OSError as exc: + self._log.error('hook for {0} failed: {1}', event, exc) self.register_listener(event, hook_function) diff --git a/libs/common/beetsplug/ihate.py b/libs/common/beetsplug/ihate.py index 6ed250fe..91850e09 100644 --- a/libs/common/beetsplug/ihate.py +++ b/libs/common/beetsplug/ihate.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Blemjhoo Tezoulbr . # @@ -13,7 +12,6 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function """Warns you about things you hate (or even blocks import).""" @@ -33,14 +31,14 @@ def summary(task): object. """ if task.is_album: - return u'{0} - {1}'.format(task.cur_artist, task.cur_album) + return f'{task.cur_artist} - {task.cur_album}' else: - return u'{0} - {1}'.format(task.item.artist, task.item.title) + return f'{task.item.artist} - {task.item.title}' class IHatePlugin(BeetsPlugin): def __init__(self): - super(IHatePlugin, self).__init__() + super().__init__() self.register_listener('import_task_choice', self.import_task_choice_event) self.config.add({ @@ -69,14 +67,14 @@ class IHatePlugin(BeetsPlugin): if task.choice_flag == action.APPLY: if skip_queries or warn_queries: - self._log.debug(u'processing your hate') + self._log.debug('processing your hate') if self.do_i_hate_this(task, skip_queries): task.choice_flag = action.SKIP - self._log.info(u'skipped: {0}', summary(task)) + self._log.info('skipped: {0}', summary(task)) return if self.do_i_hate_this(task, warn_queries): - self._log.info(u'you may hate this: {0}', summary(task)) + self._log.info('you may hate this: {0}', summary(task)) else: - self._log.debug(u'nothing to do') + self._log.debug('nothing to do') else: - self._log.debug(u'user made a decision, nothing to do') + self._log.debug('user made a decision, nothing to do') diff --git a/libs/common/beetsplug/importadded.py b/libs/common/beetsplug/importadded.py index 36407b14..e6665e0f 100644 --- a/libs/common/beetsplug/importadded.py +++ b/libs/common/beetsplug/importadded.py @@ -1,11 +1,8 @@ -# -*- coding: utf-8 -*- - """Populate an item's `added` and `mtime` fields by using the file modification time (mtime) of the item's source file before import. Reimported albums and items are skipped. """ -from __future__ import division, absolute_import, print_function import os @@ -16,7 +13,7 @@ from beets.plugins import BeetsPlugin class ImportAddedPlugin(BeetsPlugin): def __init__(self): - super(ImportAddedPlugin, self).__init__() + super().__init__() self.config.add({ 'preserve_mtimes': False, 'preserve_write_mtimes': False, @@ -27,7 +24,7 @@ class ImportAddedPlugin(BeetsPlugin): # album.path for old albums that were replaced by a reimported album self.replaced_album_paths = None # item path in the library to the mtime of the source file - self.item_mtime = dict() + self.item_mtime = {} register = self.register_listener register('import_task_created', self.check_config) @@ -53,8 +50,8 @@ class ImportAddedPlugin(BeetsPlugin): def record_if_inplace(self, task, session): if not (session.config['copy'] or session.config['move'] or session.config['link'] or session.config['hardlink']): - self._log.debug(u"In place import detected, recording mtimes from " - u"source paths") + self._log.debug("In place import detected, recording mtimes from " + "source paths") items = [task.item] \ if isinstance(task, importer.SingletonImportTask) \ else task.items @@ -62,9 +59,9 @@ class ImportAddedPlugin(BeetsPlugin): self.record_import_mtime(item, item.path, item.path) def record_reimported(self, task, session): - self.reimported_item_ids = set(item.id for item, replaced_items - in task.replaced_items.items() - if replaced_items) + self.reimported_item_ids = {item.id for item, replaced_items + in task.replaced_items.items() + if replaced_items} self.replaced_album_paths = set(task.replaced_albums.keys()) def write_file_mtime(self, path, mtime): @@ -86,14 +83,14 @@ class ImportAddedPlugin(BeetsPlugin): """ mtime = os.stat(util.syspath(source)).st_mtime self.item_mtime[destination] = mtime - self._log.debug(u"Recorded mtime {0} for item '{1}' imported from " - u"'{2}'", mtime, util.displayable_path(destination), + self._log.debug("Recorded mtime {0} for item '{1}' imported from " + "'{2}'", mtime, util.displayable_path(destination), util.displayable_path(source)) def update_album_times(self, lib, album): if self.reimported_album(album): - self._log.debug(u"Album '{0}' is reimported, skipping import of " - u"added dates for the album and its items.", + self._log.debug("Album '{0}' is reimported, skipping import of " + "added dates for the album and its items.", util.displayable_path(album.path)) return @@ -106,30 +103,30 @@ class ImportAddedPlugin(BeetsPlugin): self.write_item_mtime(item, mtime) item.store() album.added = min(album_mtimes) - self._log.debug(u"Import of album '{0}', selected album.added={1} " - u"from item file mtimes.", album.album, album.added) + self._log.debug("Import of album '{0}', selected album.added={1} " + "from item file mtimes.", album.album, album.added) album.store() def update_item_times(self, lib, item): if self.reimported_item(item): - self._log.debug(u"Item '{0}' is reimported, skipping import of " - u"added date.", util.displayable_path(item.path)) + self._log.debug("Item '{0}' is reimported, skipping import of " + "added date.", util.displayable_path(item.path)) return mtime = self.item_mtime.pop(item.path, None) if mtime: item.added = mtime if self.config['preserve_mtimes'].get(bool): self.write_item_mtime(item, mtime) - self._log.debug(u"Import of item '{0}', selected item.added={1}", + self._log.debug("Import of item '{0}', selected item.added={1}", util.displayable_path(item.path), item.added) item.store() - def update_after_write_time(self, item): + def update_after_write_time(self, item, path): """Update the mtime of the item's file with the item.added value after each write of the item if `preserve_write_mtimes` is enabled. """ if item.added: if self.config['preserve_write_mtimes'].get(bool): self.write_item_mtime(item, item.added) - self._log.debug(u"Write of item '{0}', selected item.added={1}", + self._log.debug("Write of item '{0}', selected item.added={1}", util.displayable_path(item.path), item.added) diff --git a/libs/common/beetsplug/importfeeds.py b/libs/common/beetsplug/importfeeds.py index 35ae2883..ad6d8415 100644 --- a/libs/common/beetsplug/importfeeds.py +++ b/libs/common/beetsplug/importfeeds.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Fabrice Laporte. # @@ -13,7 +12,6 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function """Write paths of imported files in various formats to ease later import in a music player. Also allow printing the new file locations to stdout in case @@ -54,11 +52,11 @@ def _write_m3u(m3u_path, items_paths): class ImportFeedsPlugin(BeetsPlugin): def __init__(self): - super(ImportFeedsPlugin, self).__init__() + super().__init__() self.config.add({ 'formats': [], - 'm3u_name': u'imported.m3u', + 'm3u_name': 'imported.m3u', 'dir': None, 'relative_to': None, 'absolute_path': False, @@ -118,9 +116,9 @@ class ImportFeedsPlugin(BeetsPlugin): link(path, dest) if 'echo' in formats: - self._log.info(u"Location of imported music:") + self._log.info("Location of imported music:") for path in paths: - self._log.info(u" {0}", path) + self._log.info(" {0}", path) def album_imported(self, lib, album): self._record_items(lib, album.album, album.items()) diff --git a/libs/common/beetsplug/info.py b/libs/common/beetsplug/info.py index 0d40c597..1e6d4b32 100644 --- a/libs/common/beetsplug/info.py +++ b/libs/common/beetsplug/info.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,19 +15,17 @@ """Shows file metadata. """ -from __future__ import division, absolute_import, print_function import os -import re from beets.plugins import BeetsPlugin from beets import ui -from beets import mediafile +import mediafile from beets.library import Item from beets.util import displayable_path, normpath, syspath -def tag_data(lib, args): +def tag_data(lib, args, album=False): query = [] for arg in args: path = normpath(arg) @@ -42,15 +39,29 @@ def tag_data(lib, args): yield tag_data_emitter(item.path) +def tag_fields(): + fields = set(mediafile.MediaFile.readable_fields()) + fields.add('art') + return fields + + def tag_data_emitter(path): - def emitter(): - fields = list(mediafile.MediaFile.readable_fields()) - fields.remove('images') + def emitter(included_keys): + if included_keys == '*': + fields = tag_fields() + else: + fields = included_keys + if 'images' in fields: + # We can't serialize the image data. + fields.remove('images') mf = mediafile.MediaFile(syspath(path)) tags = {} for field in fields: - tags[field] = getattr(mf, field) - tags['art'] = mf.art is not None + if field == 'art': + tags[field] = mf.art is not None + else: + tags[field] = getattr(mf, field, None) + # create a temporary Item to take advantage of __format__ item = Item.from_path(syspath(path)) @@ -58,15 +69,14 @@ def tag_data_emitter(path): return emitter -def library_data(lib, args): - for item in lib.items(args): +def library_data(lib, args, album=False): + for item in lib.albums(args) if album else lib.items(args): yield library_data_emitter(item) def library_data_emitter(item): - def emitter(): - data = dict(item.formatted()) - data.pop('path', None) # path is fetched from item + def emitter(included_keys): + data = dict(item.formatted(included_keys=included_keys)) return data, item return emitter @@ -98,7 +108,7 @@ def print_data(data, item=None, fmt=None): formatted = {} for key, value in data.items(): if isinstance(value, list): - formatted[key] = u'; '.join(value) + formatted[key] = '; '.join(value) if value is not None: formatted[key] = value @@ -106,7 +116,7 @@ def print_data(data, item=None, fmt=None): return maxwidth = max(len(key) for key in formatted) - lineformat = u'{{0:>{0}}}: {{1}}'.format(maxwidth) + lineformat = f'{{0:>{maxwidth}}}: {{1}}' if path: ui.print_(displayable_path(path)) @@ -114,7 +124,7 @@ def print_data(data, item=None, fmt=None): for field in sorted(formatted): value = formatted[field] if isinstance(value, list): - value = u'; '.join(value) + value = '; '.join(value) ui.print_(lineformat.format(field, value)) @@ -129,7 +139,7 @@ def print_data_keys(data, item=None): if len(formatted) == 0: return - line_format = u'{0}{{0}}'.format(u' ' * 4) + line_format = '{0}{{0}}'.format(' ' * 4) if path: ui.print_(displayable_path(path)) @@ -140,24 +150,28 @@ def print_data_keys(data, item=None): class InfoPlugin(BeetsPlugin): def commands(self): - cmd = ui.Subcommand('info', help=u'show file metadata') + cmd = ui.Subcommand('info', help='show file metadata') cmd.func = self.run cmd.parser.add_option( - u'-l', u'--library', action='store_true', - help=u'show library fields instead of tags', + '-l', '--library', action='store_true', + help='show library fields instead of tags', ) cmd.parser.add_option( - u'-s', u'--summarize', action='store_true', - help=u'summarize the tags of all files', + '-a', '--album', action='store_true', + help='show album fields instead of tracks (implies "--library")', ) cmd.parser.add_option( - u'-i', u'--include-keys', default=[], + '-s', '--summarize', action='store_true', + help='summarize the tags of all files', + ) + cmd.parser.add_option( + '-i', '--include-keys', default=[], action='append', dest='included_keys', - help=u'comma separated list of keys to show', + help='comma separated list of keys to show', ) cmd.parser.add_option( - u'-k', u'--keys-only', action='store_true', - help=u'show only the keys', + '-k', '--keys-only', action='store_true', + help='show only the keys', ) cmd.parser.add_format_option(target='item') return [cmd] @@ -176,7 +190,7 @@ class InfoPlugin(BeetsPlugin): dictionary and only prints that. If two files have different values for the same tag, the value is set to '[various]' """ - if opts.library: + if opts.library or opts.album: data_collector = library_data else: data_collector = tag_data @@ -184,18 +198,21 @@ class InfoPlugin(BeetsPlugin): included_keys = [] for keys in opts.included_keys: included_keys.extend(keys.split(',')) - key_filter = make_key_filter(included_keys) + # Drop path even if user provides it multiple times + included_keys = [k for k in included_keys if k != 'path'] first = True summary = {} - for data_emitter in data_collector(lib, ui.decargs(args)): + for data_emitter in data_collector( + lib, ui.decargs(args), + album=opts.album, + ): try: - data, item = data_emitter() - except (mediafile.UnreadableFileError, IOError) as ex: - self._log.error(u'cannot read file: {0}', ex) + data, item = data_emitter(included_keys or '*') + except (mediafile.UnreadableFileError, OSError) as ex: + self._log.error('cannot read file: {0}', ex) continue - data = key_filter(data) if opts.summarize: update_summary(summary, data) else: @@ -210,33 +227,3 @@ class InfoPlugin(BeetsPlugin): if opts.summarize: print_data(summary) - - -def make_key_filter(include): - """Return a function that filters a dictionary. - - The returned filter takes a dictionary and returns another - dictionary that only includes the key-value pairs where the key - glob-matches one of the keys in `include`. - """ - if not include: - return identity - - matchers = [] - for key in include: - key = re.escape(key) - key = key.replace(r'\*', '.*') - matchers.append(re.compile(key + '$')) - - def filter_(data): - filtered = dict() - for key, value in data.items(): - if any([m.match(key) for m in matchers]): - filtered[key] = value - return filtered - - return filter_ - - -def identity(val): - return val diff --git a/libs/common/beetsplug/inline.py b/libs/common/beetsplug/inline.py index fd0e9fc3..e19eaa9d 100644 --- a/libs/common/beetsplug/inline.py +++ b/libs/common/beetsplug/inline.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,25 +14,23 @@ """Allows inline path template customization code in the config file. """ -from __future__ import division, absolute_import, print_function import traceback import itertools from beets.plugins import BeetsPlugin from beets import config -import six -FUNC_NAME = u'__INLINE_FUNC__' +FUNC_NAME = '__INLINE_FUNC__' class InlineError(Exception): """Raised when a runtime error occurs in an inline expression. """ def __init__(self, code, exc): - super(InlineError, self).__init__( - (u"error in inline path field code:\n" - u"%s\n%s: %s") % (code, type(exc).__name__, six.text_type(exc)) + super().__init__( + ("error in inline path field code:\n" + "%s\n%s: %s") % (code, type(exc).__name__, str(exc)) ) @@ -41,7 +38,7 @@ def _compile_func(body): """Given Python code for a function body, return a compiled callable that invokes that code. """ - body = u'def {0}():\n {1}'.format( + body = 'def {}():\n {}'.format( FUNC_NAME, body.replace('\n', '\n ') ) @@ -53,7 +50,7 @@ def _compile_func(body): class InlinePlugin(BeetsPlugin): def __init__(self): - super(InlinePlugin, self).__init__() + super().__init__() config.add({ 'pathfields': {}, # Legacy name. @@ -64,14 +61,14 @@ class InlinePlugin(BeetsPlugin): # Item fields. for key, view in itertools.chain(config['item_fields'].items(), config['pathfields'].items()): - self._log.debug(u'adding item field {0}', key) + self._log.debug('adding item field {0}', key) func = self.compile_inline(view.as_str(), False) if func is not None: self.template_fields[key] = func # Album fields. for key, view in config['album_fields'].items(): - self._log.debug(u'adding album field {0}', key) + self._log.debug('adding album field {0}', key) func = self.compile_inline(view.as_str(), True) if func is not None: self.album_template_fields[key] = func @@ -84,14 +81,14 @@ class InlinePlugin(BeetsPlugin): """ # First, try compiling as a single function. try: - code = compile(u'({0})'.format(python_code), 'inline', 'eval') + code = compile(f'({python_code})', 'inline', 'eval') except SyntaxError: # Fall back to a function body. try: func = _compile_func(python_code) except SyntaxError: - self._log.error(u'syntax error in inline field definition:\n' - u'{0}', traceback.format_exc()) + self._log.error('syntax error in inline field definition:\n' + '{0}', traceback.format_exc()) return else: is_expr = False @@ -117,9 +114,13 @@ class InlinePlugin(BeetsPlugin): # For function bodies, invoke the function with values as global # variables. def _func_func(obj): + old_globals = dict(func.__globals__) func.__globals__.update(_dict_for(obj)) try: return func() except Exception as exc: raise InlineError(python_code, exc) + finally: + func.__globals__.clear() + func.__globals__.update(old_globals) return _func_func diff --git a/libs/common/beetsplug/ipfs.py b/libs/common/beetsplug/ipfs.py index 9a9d6aa5..3c42e7c8 100644 --- a/libs/common/beetsplug/ipfs.py +++ b/libs/common/beetsplug/ipfs.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # # Permission is hereby granted, free of charge, to any person obtaining @@ -15,7 +14,6 @@ """Adds support for ipfs. Requires go-ipfs and a running ipfs daemon """ -from __future__ import division, absolute_import, print_function from beets import ui, util, library, config from beets.plugins import BeetsPlugin @@ -29,9 +27,10 @@ import tempfile class IPFSPlugin(BeetsPlugin): def __init__(self): - super(IPFSPlugin, self).__init__() + super().__init__() self.config.add({ 'auto': True, + 'nocopy': False, }) if self.config['auto']: @@ -116,12 +115,15 @@ class IPFSPlugin(BeetsPlugin): self._log.info('Adding {0} to ipfs', album_dir) - cmd = "ipfs add -q -r".split() + if self.config['nocopy']: + cmd = "ipfs add --nocopy -q -r".split() + else: + cmd = "ipfs add -q -r".split() cmd.append(album_dir) try: - output = util.command_output(cmd).split() + output = util.command_output(cmd).stdout.split() except (OSError, subprocess.CalledProcessError) as exc: - self._log.error(u'Failed to add {0}, error: {1}', album_dir, exc) + self._log.error('Failed to add {0}, error: {1}', album_dir, exc) return False length = len(output) @@ -147,6 +149,8 @@ class IPFSPlugin(BeetsPlugin): def ipfs_get(self, lib, query): query = query[0] # Check if query is a hash + # TODO: generalize to other hashes; probably use a multihash + # implementation if query.startswith("Qm") and len(query) == 46: self.ipfs_get_from_hash(lib, query) else: @@ -174,11 +178,14 @@ class IPFSPlugin(BeetsPlugin): with tempfile.NamedTemporaryFile() as tmp: self.ipfs_added_albums(lib, tmp.name) try: - cmd = "ipfs add -q ".split() + if self.config['nocopy']: + cmd = "ipfs add --nocopy -q ".split() + else: + cmd = "ipfs add -q ".split() cmd.append(tmp.name) - output = util.command_output(cmd) + output = util.command_output(cmd).stdout except (OSError, subprocess.CalledProcessError) as err: - msg = "Failed to publish library. Error: {0}".format(err) + msg = f"Failed to publish library. Error: {err}" self._log.error(msg) return False self._log.info("hash of library: {0}", output) @@ -190,26 +197,26 @@ class IPFSPlugin(BeetsPlugin): else: lib_name = _hash lib_root = os.path.dirname(lib.path) - remote_libs = lib_root + "/remotes" + remote_libs = os.path.join(lib_root, b"remotes") if not os.path.exists(remote_libs): try: os.makedirs(remote_libs) except OSError as e: - msg = "Could not create {0}. Error: {1}".format(remote_libs, e) + msg = f"Could not create {remote_libs}. Error: {e}" self._log.error(msg) return False - path = remote_libs + "/" + lib_name + ".db" + path = os.path.join(remote_libs, lib_name.encode() + b".db") if not os.path.exists(path): - cmd = "ipfs get {0} -o".format(_hash).split() + cmd = f"ipfs get {_hash} -o".split() cmd.append(path) try: util.command_output(cmd) except (OSError, subprocess.CalledProcessError): - self._log.error("Could not import {0}".format(_hash)) + self._log.error(f"Could not import {_hash}") return False # add all albums from remotes into a combined library - jpath = remote_libs + "/joined.db" + jpath = os.path.join(remote_libs, b"joined.db") jlib = library.Library(jpath) nlib = library.Library(path) for album in nlib.albums(): @@ -232,12 +239,12 @@ class IPFSPlugin(BeetsPlugin): fmt = config['format_album'].get() try: albums = self.query(lib, args) - except IOError: + except OSError: ui.print_("No imported libraries yet.") return for album in albums: - ui.print_(format(album, fmt), " : ", album.ipfs) + ui.print_(format(album, fmt), " : ", album.ipfs.decode()) def query(self, lib, args): rlib = self.get_remote_lib(lib) @@ -246,10 +253,10 @@ class IPFSPlugin(BeetsPlugin): def get_remote_lib(self, lib): lib_root = os.path.dirname(lib.path) - remote_libs = lib_root + "/remotes" - path = remote_libs + "/joined.db" + remote_libs = os.path.join(lib_root, b"remotes") + path = os.path.join(remote_libs, b"joined.db") if not os.path.isfile(path): - raise IOError + raise OSError return library.Library(path) def ipfs_added_albums(self, rlib, tmpname): @@ -276,7 +283,7 @@ class IPFSPlugin(BeetsPlugin): util._fsencoding(), 'ignore' ) # Clear current path from item - item.path = '/ipfs/{0}/{1}'.format(album.ipfs, item_path) + item.path = f'/ipfs/{album.ipfs}/{item_path}' item.id = None items.append(item) diff --git a/libs/common/beetsplug/keyfinder.py b/libs/common/beetsplug/keyfinder.py index a3fbc821..b695ab54 100644 --- a/libs/common/beetsplug/keyfinder.py +++ b/libs/common/beetsplug/keyfinder.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Thomas Scholtes. # @@ -16,8 +15,8 @@ """Uses the `KeyFinder` program to add the `initial_key` field. """ -from __future__ import division, absolute_import, print_function +import os.path import subprocess from beets import ui @@ -28,11 +27,11 @@ from beets.plugins import BeetsPlugin class KeyFinderPlugin(BeetsPlugin): def __init__(self): - super(KeyFinderPlugin, self).__init__() + super().__init__() self.config.add({ - u'bin': u'KeyFinder', - u'auto': True, - u'overwrite': False, + 'bin': 'KeyFinder', + 'auto': True, + 'overwrite': False, }) if self.config['auto'].get(bool): @@ -40,7 +39,7 @@ class KeyFinderPlugin(BeetsPlugin): def commands(self): cmd = ui.Subcommand('keyfinder', - help=u'detect and add initial key from audio') + help='detect and add initial key from audio') cmd.func = self.command return [cmd] @@ -52,34 +51,45 @@ class KeyFinderPlugin(BeetsPlugin): def find_key(self, items, write=False): overwrite = self.config['overwrite'].get(bool) - bin = self.config['bin'].as_str() + command = [self.config['bin'].as_str()] + # The KeyFinder GUI program needs the -f flag before the path. + # keyfinder-cli is similar, but just wants the path with no flag. + if 'keyfinder-cli' not in os.path.basename(command[0]).lower(): + command.append('-f') for item in items: if item['initial_key'] and not overwrite: continue try: - output = util.command_output([bin, '-f', - util.syspath(item.path)]) + output = util.command_output(command + [util.syspath( + item.path)]).stdout except (subprocess.CalledProcessError, OSError) as exc: - self._log.error(u'execution failed: {0}', exc) + self._log.error('execution failed: {0}', exc) continue except UnicodeEncodeError: # Workaround for Python 2 Windows bug. - # http://bugs.python.org/issue1759845 - self._log.error(u'execution failed for Unicode path: {0!r}', + # https://bugs.python.org/issue1759845 + self._log.error('execution failed for Unicode path: {0!r}', item.path) continue - key_raw = output.rsplit(None, 1)[-1] + try: + key_raw = output.rsplit(None, 1)[-1] + except IndexError: + # Sometimes keyfinder-cli returns 0 but with no key, usually + # when the file is silent or corrupt, so we log and skip. + self._log.error('no key returned for path: {0}', item.path) + continue + try: key = util.text_string(key_raw) except UnicodeDecodeError: - self._log.error(u'output is invalid UTF-8') + self._log.error('output is invalid UTF-8') continue item['initial_key'] = key - self._log.info(u'added computed initial key {0} for {1}', + self._log.info('added computed initial key {0} for {1}', key, util.displayable_path(item.path)) if write: diff --git a/libs/common/beetsplug/kodiupdate.py b/libs/common/beetsplug/kodiupdate.py index ce5cb478..2a885d2c 100644 --- a/libs/common/beetsplug/kodiupdate.py +++ b/libs/common/beetsplug/kodiupdate.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2017, Pauli Kettunen. # @@ -23,18 +22,16 @@ Put something like the following in your config.yaml to configure: user: user pwd: secret """ -from __future__ import division, absolute_import, print_function import requests from beets import config from beets.plugins import BeetsPlugin -import six def update_kodi(host, port, user, password): """Sends request to the Kodi api to start a library refresh. """ - url = "http://{0}:{1}/jsonrpc".format(host, port) + url = f"http://{host}:{port}/jsonrpc" """Content-Type: application/json is mandatory according to the kodi jsonrpc documentation""" @@ -54,14 +51,14 @@ def update_kodi(host, port, user, password): class KodiUpdate(BeetsPlugin): def __init__(self): - super(KodiUpdate, self).__init__() + super().__init__() # Adding defaults. config['kodi'].add({ - u'host': u'localhost', - u'port': 8080, - u'user': u'kodi', - u'pwd': u'kodi'}) + 'host': 'localhost', + 'port': 8080, + 'user': 'kodi', + 'pwd': 'kodi'}) config['kodi']['pwd'].redact = True self.register_listener('database_change', self.listen_for_db_change) @@ -73,7 +70,7 @@ class KodiUpdate(BeetsPlugin): def update(self, lib): """When the client exists try to send refresh request to Kodi server. """ - self._log.info(u'Requesting a Kodi library update...') + self._log.info('Requesting a Kodi library update...') # Try to send update request. try: @@ -85,14 +82,14 @@ class KodiUpdate(BeetsPlugin): r.raise_for_status() except requests.exceptions.RequestException as e: - self._log.warning(u'Kodi update failed: {0}', - six.text_type(e)) + self._log.warning('Kodi update failed: {0}', + str(e)) return json = r.json() if json.get('result') != 'OK': - self._log.warning(u'Kodi update failed: JSON response was {0!r}', + self._log.warning('Kodi update failed: JSON response was {0!r}', json) return - self._log.info(u'Kodi update triggered') + self._log.info('Kodi update triggered') diff --git a/libs/common/beetsplug/lastgenre/__init__.py b/libs/common/beetsplug/lastgenre/__init__.py index 4374310b..05412308 100644 --- a/libs/common/beetsplug/lastgenre/__init__.py +++ b/libs/common/beetsplug/lastgenre/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -13,8 +12,6 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function -import six """Gets genres for imported music based on Last.fm tags. @@ -46,7 +43,7 @@ PYLAST_EXCEPTIONS = ( ) REPLACE = { - u'\u2010': '-', + '\u2010': '-', } @@ -73,7 +70,7 @@ def flatten_tree(elem, path, branches): for sub in elem: flatten_tree(sub, path, branches) else: - branches.append(path + [six.text_type(elem)]) + branches.append(path + [str(elem)]) def find_parents(candidate, branches): @@ -97,7 +94,7 @@ C14N_TREE = os.path.join(os.path.dirname(__file__), 'genres-tree.yaml') class LastGenrePlugin(plugins.BeetsPlugin): def __init__(self): - super(LastGenrePlugin, self).__init__() + super().__init__() self.config.add({ 'whitelist': True, @@ -108,8 +105,9 @@ class LastGenrePlugin(plugins.BeetsPlugin): 'source': 'album', 'force': True, 'auto': True, - 'separator': u', ', + 'separator': ', ', 'prefer_specific': False, + 'title_case': True, }) self.setup() @@ -132,18 +130,27 @@ class LastGenrePlugin(plugins.BeetsPlugin): with open(wl_filename, 'rb') as f: for line in f: line = line.decode('utf-8').strip().lower() - if line and not line.startswith(u'#'): + if line and not line.startswith('#'): self.whitelist.add(line) # Read the genres tree for canonicalization if enabled. self.c14n_branches = [] c14n_filename = self.config['canonical'].get() - if c14n_filename in (True, ''): # Default tree. + self.canonicalize = c14n_filename is not False + + # Default tree + if c14n_filename in (True, ''): c14n_filename = C14N_TREE + elif not self.canonicalize and self.config['prefer_specific'].get(): + # prefer_specific requires a tree, load default tree + c14n_filename = C14N_TREE + + # Read the tree if c14n_filename: + self._log.debug('Loading canonicalization tree {0}', c14n_filename) c14n_filename = normpath(c14n_filename) with codecs.open(c14n_filename, 'r', encoding='utf-8') as f: - genres_tree = yaml.load(f) + genres_tree = yaml.safe_load(f) flatten_tree(genres_tree, [], self.c14n_branches) @property @@ -186,7 +193,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): return None count = self.config['count'].get(int) - if self.c14n_branches: + if self.canonicalize: # Extend the list to consider tags parents in the c14n tree tags_all = [] for tag in tags: @@ -214,12 +221,17 @@ class LastGenrePlugin(plugins.BeetsPlugin): # c14n only adds allowed genres but we may have had forbidden genres in # the original tags list - tags = [x.title() for x in tags if self._is_allowed(x)] + tags = [self._format_tag(x) for x in tags if self._is_allowed(x)] return self.config['separator'].as_str().join( tags[:self.config['count'].get(int)] ) + def _format_tag(self, tag): + if self.config["title_case"]: + return tag.title() + return tag + def fetch_genre(self, lastfm_obj): """Return the genre for a pylast entity or None if no suitable genre can be found. Ex. 'Electronic, House, Dance' @@ -251,8 +263,8 @@ class LastGenrePlugin(plugins.BeetsPlugin): if any(not s for s in args): return None - key = u'{0}.{1}'.format(entity, - u'-'.join(six.text_type(a) for a in args)) + key = '{}.{}'.format(entity, + '-'.join(str(a) for a in args)) if key in self._genre_cache: return self._genre_cache[key] else: @@ -270,28 +282,28 @@ class LastGenrePlugin(plugins.BeetsPlugin): """Return the album genre for this Item or Album. """ return self._last_lookup( - u'album', LASTFM.get_album, obj.albumartist, obj.album + 'album', LASTFM.get_album, obj.albumartist, obj.album ) def fetch_album_artist_genre(self, obj): """Return the album artist genre for this Item or Album. """ return self._last_lookup( - u'artist', LASTFM.get_artist, obj.albumartist + 'artist', LASTFM.get_artist, obj.albumartist ) def fetch_artist_genre(self, item): """Returns the track artist genre for this Item. """ return self._last_lookup( - u'artist', LASTFM.get_artist, item.artist + 'artist', LASTFM.get_artist, item.artist ) def fetch_track_genre(self, obj): """Returns the track genre for this Item. """ return self._last_lookup( - u'track', LASTFM.get_track, obj.artist, obj.title + 'track', LASTFM.get_track, obj.artist, obj.title ) def _get_genre(self, obj): @@ -361,38 +373,56 @@ class LastGenrePlugin(plugins.BeetsPlugin): return None, None def commands(self): - lastgenre_cmd = ui.Subcommand('lastgenre', help=u'fetch genres') + lastgenre_cmd = ui.Subcommand('lastgenre', help='fetch genres') lastgenre_cmd.parser.add_option( - u'-f', u'--force', dest='force', - action='store_true', default=False, - help=u're-download genre when already present' + '-f', '--force', dest='force', + action='store_true', + help='re-download genre when already present' ) lastgenre_cmd.parser.add_option( - u'-s', u'--source', dest='source', type='string', - help=u'genre source: artist, album, or track' + '-s', '--source', dest='source', type='string', + help='genre source: artist, album, or track' ) + lastgenre_cmd.parser.add_option( + '-A', '--items', action='store_false', dest='album', + help='match items instead of albums') + lastgenre_cmd.parser.add_option( + '-a', '--albums', action='store_true', dest='album', + help='match albums instead of items') + lastgenre_cmd.parser.set_defaults(album=True) def lastgenre_func(lib, opts, args): write = ui.should_write() self.config.set_args(opts) - for album in lib.albums(ui.decargs(args)): - album.genre, src = self._get_genre(album) - self._log.info(u'genre for album {0} ({1}): {0.genre}', - album, src) - album.store() + if opts.album: + # Fetch genres for whole albums + for album in lib.albums(ui.decargs(args)): + album.genre, src = self._get_genre(album) + self._log.info('genre for album {0} ({1}): {0.genre}', + album, src) + album.store() - for item in album.items(): - # If we're using track-level sources, also look up each - # track on the album. - if 'track' in self.sources: - item.genre, src = self._get_genre(item) - item.store() - self._log.info(u'genre for track {0} ({1}): {0.genre}', - item, src) + for item in album.items(): + # If we're using track-level sources, also look up each + # track on the album. + if 'track' in self.sources: + item.genre, src = self._get_genre(item) + item.store() + self._log.info( + 'genre for track {0} ({1}): {0.genre}', + item, src) - if write: - item.try_write() + if write: + item.try_write() + else: + # Just query singletons, i.e. items that are not part of + # an album + for item in lib.items(ui.decargs(args)): + item.genre, src = self._get_genre(item) + self._log.debug('added last.fm item genre ({0}): {1}', + src, item.genre) + item.store() lastgenre_cmd.func = lastgenre_func return [lastgenre_cmd] @@ -402,21 +432,21 @@ class LastGenrePlugin(plugins.BeetsPlugin): if task.is_album: album = task.album album.genre, src = self._get_genre(album) - self._log.debug(u'added last.fm album genre ({0}): {1}', + self._log.debug('added last.fm album genre ({0}): {1}', src, album.genre) album.store() if 'track' in self.sources: for item in album.items(): item.genre, src = self._get_genre(item) - self._log.debug(u'added last.fm item genre ({0}): {1}', + self._log.debug('added last.fm item genre ({0}): {1}', src, item.genre) item.store() else: item = task.item item.genre, src = self._get_genre(item) - self._log.debug(u'added last.fm item genre ({0}): {1}', + self._log.debug('added last.fm item genre ({0}): {1}', src, item.genre) item.store() @@ -438,12 +468,12 @@ class LastGenrePlugin(plugins.BeetsPlugin): try: res = obj.get_top_tags() except PYLAST_EXCEPTIONS as exc: - self._log.debug(u'last.fm error: {0}', exc) + self._log.debug('last.fm error: {0}', exc) return [] except Exception as exc: # Isolate bugs in pylast. - self._log.debug(u'{}', traceback.format_exc()) - self._log.error(u'error in pylast library: {0}', exc) + self._log.debug('{}', traceback.format_exc()) + self._log.error('error in pylast library: {0}', exc) return [] # Filter by weight (optionally). diff --git a/libs/common/beetsplug/lastgenre/genres-tree.yaml b/libs/common/beetsplug/lastgenre/genres-tree.yaml index a09f7e6b..c8ae4247 100644 --- a/libs/common/beetsplug/lastgenre/genres-tree.yaml +++ b/libs/common/beetsplug/lastgenre/genres-tree.yaml @@ -648,35 +648,51 @@ - glam rock - hard rock - heavy metal: - - alternative metal + - alternative metal: + - funk metal - black metal: - viking metal - christian metal - death metal: + - death/doom - goregrind - melodic death metal - technical death metal - - doom metal + - doom metal: + - epic doom metal + - funeral doom - drone metal + - epic metal - folk metal: - celtic metal - medieval metal + - pagan metal - funk metal - glam metal - gothic metal + - industrial metal: + - industrial death metal - metalcore: - deathcore - mathcore: - djent - - power metal + - synthcore + - neoclassical metal + - post-metal + - power metal: + - progressive power metal - progressive metal - sludge metal - speed metal - - stoner rock + - stoner rock: + - stoner metal - symphonic metal - thrash metal: - crossover thrash - groove metal + - progressive thrash metal + - teutonic thrash metal + - traditional heavy metal - math rock - new wave: - world fusion @@ -719,6 +735,7 @@ - street punk - thrashcore - horror punk + - oi! - pop punk - psychobilly - riot grrrl diff --git a/libs/common/beetsplug/lastgenre/genres.txt b/libs/common/beetsplug/lastgenre/genres.txt index 914ee129..7ccd7ad3 100644 --- a/libs/common/beetsplug/lastgenre/genres.txt +++ b/libs/common/beetsplug/lastgenre/genres.txt @@ -450,6 +450,8 @@ emo rap emocore emotronic enka +epic doom metal +epic metal eremwu eu ethereal pop ethereal wave @@ -1024,6 +1026,7 @@ neo-medieval neo-prog neo-psychedelia neoclassical +neoclassical metal neoclassical music neofolk neotraditional country @@ -1176,8 +1179,10 @@ progressive folk progressive folk music progressive house progressive metal +progressive power metal progressive rock progressive trance +progressive thrash metal protopunk psych folk psychedelic music @@ -1396,6 +1401,7 @@ symphonic metal symphonic poem symphonic rock symphony +synthcore synthpop synthpunk t'ong guitar @@ -1428,6 +1434,7 @@ tejano tejano music tekno tembang sunda +teutonic thrash metal texas blues thai pop thillana @@ -1444,6 +1451,7 @@ toeshey togaku trad jazz traditional bluegrass +traditional heavy metal traditional pop music trallalero trance diff --git a/libs/common/beetsplug/lastimport.py b/libs/common/beetsplug/lastimport.py index d7b84b0a..16d53302 100644 --- a/libs/common/beetsplug/lastimport.py +++ b/libs/common/beetsplug/lastimport.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2016, Rafael Bodill http://github.com/rafi +# Copyright 2016, Rafael Bodill https://github.com/rafi # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -13,7 +12,6 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function import pylast from pylast import TopItem, _extract, _number @@ -28,7 +26,7 @@ API_URL = 'https://ws.audioscrobbler.com/2.0/' class LastImportPlugin(plugins.BeetsPlugin): def __init__(self): - super(LastImportPlugin, self).__init__() + super().__init__() config['lastfm'].add({ 'user': '', 'api_key': plugins.LASTFM_KEY, @@ -43,7 +41,7 @@ class LastImportPlugin(plugins.BeetsPlugin): } def commands(self): - cmd = ui.Subcommand('lastimport', help=u'import last.fm play-count') + cmd = ui.Subcommand('lastimport', help='import last.fm play-count') def func(lib, opts, args): import_lastfm(lib, self._log) @@ -59,7 +57,7 @@ class CustomUser(pylast.User): tracks. """ def __init__(self, *args, **kwargs): - super(CustomUser, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def _get_things(self, method, thing, thing_type, params=None, cacheable=True): @@ -114,9 +112,9 @@ def import_lastfm(lib, log): per_page = config['lastimport']['per_page'].get(int) if not user: - raise ui.UserError(u'You must specify a user name for lastimport') + raise ui.UserError('You must specify a user name for lastimport') - log.info(u'Fetching last.fm library for @{0}', user) + log.info('Fetching last.fm library for @{0}', user) page_total = 1 page_current = 0 @@ -125,15 +123,15 @@ def import_lastfm(lib, log): retry_limit = config['lastimport']['retry_limit'].get(int) # Iterate through a yet to be known page total count while page_current < page_total: - log.info(u'Querying page #{0}{1}...', + log.info('Querying page #{0}{1}...', page_current + 1, - '/{}'.format(page_total) if page_total > 1 else '') + f'/{page_total}' if page_total > 1 else '') for retry in range(0, retry_limit): tracks, page_total = fetch_tracks(user, page_current + 1, per_page) if page_total < 1: # It means nothing to us! - raise ui.UserError(u'Last.fm reported no data.') + raise ui.UserError('Last.fm reported no data.') if tracks: found, unknown = process_tracks(lib, tracks, log) @@ -141,22 +139,22 @@ def import_lastfm(lib, log): unknown_total += unknown break else: - log.error(u'ERROR: unable to read page #{0}', + log.error('ERROR: unable to read page #{0}', page_current + 1) if retry < retry_limit: log.info( - u'Retrying page #{0}... ({1}/{2} retry)', + 'Retrying page #{0}... ({1}/{2} retry)', page_current + 1, retry + 1, retry_limit ) else: - log.error(u'FAIL: unable to fetch page #{0}, ', - u'tried {1} times', page_current, retry + 1) + log.error('FAIL: unable to fetch page #{0}, ', + 'tried {1} times', page_current, retry + 1) page_current += 1 - log.info(u'... done!') - log.info(u'finished processing {0} song pages', page_total) - log.info(u'{0} unknown play-counts', unknown_total) - log.info(u'{0} play-counts imported', found_total) + log.info('... done!') + log.info('finished processing {0} song pages', page_total) + log.info('{0} unknown play-counts', unknown_total) + log.info('{0} play-counts imported', found_total) def fetch_tracks(user, page, limit): @@ -190,7 +188,7 @@ def process_tracks(lib, tracks, log): total = len(tracks) total_found = 0 total_fails = 0 - log.info(u'Received {0} tracks in this page, processing...', total) + log.info('Received {0} tracks in this page, processing...', total) for num in range(0, total): song = None @@ -201,7 +199,7 @@ def process_tracks(lib, tracks, log): if 'album' in tracks[num]: album = tracks[num]['album'].get('name', '').strip() - log.debug(u'query: {0} - {1} ({2})', artist, title, album) + log.debug('query: {0} - {1} ({2})', artist, title, album) # First try to query by musicbrainz's trackid if trackid: @@ -211,7 +209,7 @@ def process_tracks(lib, tracks, log): # If not, try just artist/title if song is None: - log.debug(u'no album match, trying by artist/title') + log.debug('no album match, trying by artist/title') query = dbcore.AndQuery([ dbcore.query.SubstringQuery('artist', artist), dbcore.query.SubstringQuery('title', title) @@ -220,8 +218,8 @@ def process_tracks(lib, tracks, log): # Last resort, try just replacing to utf-8 quote if song is None: - title = title.replace("'", u'\u2019') - log.debug(u'no title match, trying utf-8 single quote') + title = title.replace("'", '\u2019') + log.debug('no title match, trying utf-8 single quote') query = dbcore.AndQuery([ dbcore.query.SubstringQuery('artist', artist), dbcore.query.SubstringQuery('title', title) @@ -231,19 +229,19 @@ def process_tracks(lib, tracks, log): if song is not None: count = int(song.get('play_count', 0)) new_count = int(tracks[num]['playcount']) - log.debug(u'match: {0} - {1} ({2}) ' - u'updating: play_count {3} => {4}', + log.debug('match: {0} - {1} ({2}) ' + 'updating: play_count {3} => {4}', song.artist, song.title, song.album, count, new_count) song['play_count'] = new_count song.store() total_found += 1 else: total_fails += 1 - log.info(u' - No match: {0} - {1} ({2})', + log.info(' - No match: {0} - {1} ({2})', artist, title, album) if total_fails > 0: - log.info(u'Acquired {0}/{1} play-counts ({2} unknown)', + log.info('Acquired {0}/{1} play-counts ({2} unknown)', total_found, total, total_fails) return total_found, total_fails diff --git a/libs/common/beetsplug/loadext.py b/libs/common/beetsplug/loadext.py new file mode 100644 index 00000000..191b97a2 --- /dev/null +++ b/libs/common/beetsplug/loadext.py @@ -0,0 +1,44 @@ +# This file is part of beets. +# Copyright 2019, Jack Wilsdon +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Load SQLite extensions. +""" + + +from beets.dbcore import Database +from beets.plugins import BeetsPlugin +import sqlite3 + + +class LoadExtPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + if not Database.supports_extensions: + self._log.warn('loadext is enabled but the current SQLite ' + 'installation does not support extensions') + return + + self.register_listener('library_opened', self.library_opened) + + def library_opened(self, lib): + for v in self.config: + ext = v.as_filename() + + self._log.debug('loading extension {}', ext) + + try: + lib.load_extension(ext) + except sqlite3.OperationalError as e: + self._log.error('failed to load extension {}: {}', ext, e) diff --git a/libs/common/beetsplug/lyrics.py b/libs/common/beetsplug/lyrics.py index 60f53759..2cb50ca5 100644 --- a/libs/common/beetsplug/lyrics.py +++ b/libs/common/beetsplug/lyrics.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,7 +15,6 @@ """Fetches, embeds, and displays lyrics. """ -from __future__ import absolute_import, division, print_function import difflib import errno @@ -29,11 +27,11 @@ import requests import unicodedata from unidecode import unidecode import warnings -import six -from six.moves import urllib +import urllib try: - from bs4 import SoupStrainer, BeautifulSoup + import bs4 + from bs4 import SoupStrainer HAS_BEAUTIFUL_SOUP = True except ImportError: HAS_BEAUTIFUL_SOUP = False @@ -48,7 +46,7 @@ try: # PY3: HTMLParseError was removed in 3.5 as strict mode # was deprecated in 3.3. # https://docs.python.org/3.3/library/html.parser.html - from six.moves.html_parser import HTMLParseError + from html.parser import HTMLParseError except ImportError: class HTMLParseError(Exception): pass @@ -62,23 +60,23 @@ COMMENT_RE = re.compile(r'', re.S) TAG_RE = re.compile(r'<[^>]*>') BREAK_RE = re.compile(r'\n?\s*]*)*>\s*\n?', re.I) URL_CHARACTERS = { - u'\u2018': u"'", - u'\u2019': u"'", - u'\u201c': u'"', - u'\u201d': u'"', - u'\u2010': u'-', - u'\u2011': u'-', - u'\u2012': u'-', - u'\u2013': u'-', - u'\u2014': u'-', - u'\u2015': u'-', - u'\u2016': u'-', - u'\u2026': u'...', + '\u2018': "'", + '\u2019': "'", + '\u201c': '"', + '\u201d': '"', + '\u2010': '-', + '\u2011': '-', + '\u2012': '-', + '\u2013': '-', + '\u2014': '-', + '\u2015': '-', + '\u2016': '-', + '\u2026': '...', } -USER_AGENT = 'beets/{}'.format(beets.__version__) +USER_AGENT = f'beets/{beets.__version__}' # The content for the base index.rst generated in ReST mode. -REST_INDEX_TEMPLATE = u'''Lyrics +REST_INDEX_TEMPLATE = '''Lyrics ====== * :ref:`Song index ` @@ -94,11 +92,11 @@ Artist index: ''' # The content for the base conf.py generated. -REST_CONF_TEMPLATE = u'''# -*- coding: utf-8 -*- +REST_CONF_TEMPLATE = '''# -*- coding: utf-8 -*- master_doc = 'index' -project = u'Lyrics' -copyright = u'none' -author = u'Various Authors' +project = 'Lyrics' +copyright = 'none' +author = 'Various Authors' latex_documents = [ (master_doc, 'Lyrics.tex', project, author, 'manual'), @@ -117,7 +115,7 @@ epub_tocdup = False def unichar(i): try: - return six.unichr(i) + return chr(i) except ValueError: return struct.pack('i', i).decode('utf-32') @@ -126,12 +124,12 @@ def unescape(text): """Resolve &#xxx; HTML entities (and some others).""" if isinstance(text, bytes): text = text.decode('utf-8', 'ignore') - out = text.replace(u' ', u' ') + out = text.replace(' ', ' ') def replchar(m): num = m.group(1) return unichar(int(num)) - out = re.sub(u"&#(\d+);", replchar, out) + out = re.sub("&#(\\d+);", replchar, out) return out @@ -140,43 +138,10 @@ def extract_text_between(html, start_marker, end_marker): _, html = html.split(start_marker, 1) html, _ = html.split(end_marker, 1) except ValueError: - return u'' + return '' return html -def extract_text_in(html, starttag): - """Extract the text from a
tag in the HTML starting with - ``starttag``. Returns None if parsing fails. - """ - # Strip off the leading text before opening tag. - try: - _, html = html.split(starttag, 1) - except ValueError: - return - - # Walk through balanced DIV tags. - level = 0 - parts = [] - pos = 0 - for match in DIV_RE.finditer(html): - if match.group(1): # Closing tag. - level -= 1 - if level == 0: - pos = match.end() - else: # Opening tag. - if level == 0: - parts.append(html[pos:match.start()]) - level += 1 - - if level == -1: - parts.append(html[pos:match.start()]) - break - else: - print(u'no closing tag found!') - return - return u''.join(parts) - - def search_pairs(item): """Yield a pairs of artists and titles to search for. @@ -186,6 +151,9 @@ def search_pairs(item): In addition to the artist and title obtained from the `item` the method tries to strip extra information like paranthesized suffixes and featured artists from the strings and add them as candidates. + The artist sort name is added as a fallback candidate to help in + cases where artist name includes special characters or is in a + non-latin script. The method also tries to split multiple titles separated with `/`. """ def generate_alternatives(string, patterns): @@ -199,19 +167,23 @@ def search_pairs(item): alternatives.append(match.group(1)) return alternatives - title, artist = item.title, item.artist + title, artist, artist_sort = item.title, item.artist, item.artist_sort patterns = [ # Remove any featuring artists from the artists name - r"(.*?) {0}".format(plugins.feat_tokens())] + fr"(.*?) {plugins.feat_tokens()}"] artists = generate_alternatives(artist, patterns) + # Use the artist_sort as fallback only if it differs from artist to avoid + # repeated remote requests with the same search terms + if artist != artist_sort: + artists.append(artist_sort) patterns = [ # Remove a parenthesized suffix from a title string. Common # examples include (live), (remix), and (acoustic). r"(.+?)\s+[(].*[)]$", # Remove any featuring artists from the title - r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)), + r"(.*?) {}".format(plugins.feat_tokens(for_artist=False)), # Remove part of title after colon ':' for songs with subtitles r"(.+?)\s*:.*"] titles = generate_alternatives(title, patterns) @@ -245,14 +217,27 @@ def slug(text): return re.sub(r'\W+', '-', unidecode(text).lower().strip()).strip('-') -class Backend(object): +if HAS_BEAUTIFUL_SOUP: + def try_parse_html(html, **kwargs): + try: + return bs4.BeautifulSoup(html, 'html.parser', **kwargs) + except HTMLParseError: + return None +else: + def try_parse_html(html, **kwargs): + return None + + +class Backend: + REQUIRES_BS = False + def __init__(self, config, log): self._log = log @staticmethod def _encode(s): """Encode the string for inclusion in a URL""" - if isinstance(s, six.text_type): + if isinstance(s, str): for char, repl in URL_CHARACTERS.items(): s = s.replace(char, repl) s = s.encode('utf-8', 'ignore') @@ -277,20 +262,21 @@ class Backend(object): 'User-Agent': USER_AGENT, }) except requests.RequestException as exc: - self._log.debug(u'lyrics request failed: {0}', exc) + self._log.debug('lyrics request failed: {0}', exc) return if r.status_code == requests.codes.ok: return r.text else: - self._log.debug(u'failed to fetch: {0} ({1})', url, r.status_code) + self._log.debug('failed to fetch: {0} ({1})', url, r.status_code) + return None def fetch(self, artist, title): raise NotImplementedError() -class SymbolsReplaced(Backend): +class MusiXmatch(Backend): REPLACEMENTS = { - r'\s+': '_', + r'\s+': '-', '<': 'Less_Than', '>': 'Greater_Than', '#': 'Number_', @@ -298,39 +284,40 @@ class SymbolsReplaced(Backend): r'[\]\}]': ')', } + URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s' + @classmethod def _encode(cls, s): for old, new in cls.REPLACEMENTS.items(): s = re.sub(old, new, s) - return super(SymbolsReplaced, cls)._encode(s) - - -class MusiXmatch(SymbolsReplaced): - REPLACEMENTS = dict(SymbolsReplaced.REPLACEMENTS, **{ - r'\s+': '-' - }) - - URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s' + return super()._encode(s) def fetch(self, artist, title): url = self.build_url(artist, title) html = self.fetch_url(url) if not html: - return + return None if "We detected that your IP is blocked" in html: - self._log.warning(u'we are blocked at MusixMatch: url %s failed' + self._log.warning('we are blocked at MusixMatch: url %s failed' % url) - return - html_part = html.split('

', '

')) + lyrics = '\n'.join(lyrics_parts) lyrics = lyrics.strip(',"').replace('\\n', '\n') # another odd case: sometimes only that string remains, for # missing songs. this seems to happen after being blocked # above, when filling in the CAPTCHA. if "Instant lyrics for all your music." in lyrics: - return + return None + # sometimes there are non-existent lyrics with some content + if 'Lyrics | Musixmatch' in lyrics: + return None return lyrics @@ -341,87 +328,171 @@ class Genius(Backend): bigishdata.com/2016/09/27/getting-song-lyrics-from-geniuss-api-scraping/ """ + REQUIRES_BS = True + base_url = "https://api.genius.com" def __init__(self, config, log): - super(Genius, self).__init__(config, log) + super().__init__(config, log) self.api_key = config['genius_api_key'].as_str() self.headers = { 'Authorization': "Bearer %s" % self.api_key, 'User-Agent': USER_AGENT, } - def lyrics_from_song_api_path(self, song_api_path): - song_url = self.base_url + song_api_path - response = requests.get(song_url, headers=self.headers) - json = response.json() - path = json["response"]["song"]["path"] - - # Gotta go regular html scraping... come on Genius. - page_url = "https://genius.com" + path - try: - page = requests.get(page_url) - except requests.RequestException as exc: - self._log.debug(u'Genius page request for {0} failed: {1}', - page_url, exc) - return None - html = BeautifulSoup(page.text, "html.parser") - - # Remove script tags that they put in the middle of the lyrics. - [h.extract() for h in html('script')] - - # At least Genius is nice and has a tag called 'lyrics'! - # Updated css where the lyrics are based in HTML. - lyrics = html.find("div", class_="lyrics").get_text() - - return lyrics - def fetch(self, artist, title): - search_url = self.base_url + "/search" - data = {'q': title} - try: - response = requests.get(search_url, data=data, - headers=self.headers) - except requests.RequestException as exc: - self._log.debug(u'Genius API request failed: {0}', exc) + """Fetch lyrics from genius.com + + Because genius doesn't allow accesssing lyrics via the api, + we first query the api for a url matching our artist & title, + then attempt to scrape that url for the lyrics. + """ + json = self._search(artist, title) + if not json: + self._log.debug('Genius API request returned invalid JSON') return None - try: - json = response.json() - except ValueError: - self._log.debug(u'Genius API request returned invalid JSON') - return None - - song_info = None + # find a matching artist in the json for hit in json["response"]["hits"]: - if hit["result"]["primary_artist"]["name"] == artist: - song_info = hit - break + hit_artist = hit["result"]["primary_artist"]["name"] - if song_info: - song_api_path = song_info["result"]["api_path"] - return self.lyrics_from_song_api_path(song_api_path) + if slug(hit_artist) == slug(artist): + html = self.fetch_url(hit["result"]["url"]) + if not html: + return None + return self._scrape_lyrics_from_html(html) + self._log.debug('Genius failed to find a matching artist for \'{0}\'', + artist) + return None -class LyricsWiki(SymbolsReplaced): - """Fetch lyrics from LyricsWiki.""" + def _search(self, artist, title): + """Searches the genius api for a given artist and title - URL_PATTERN = 'http://lyrics.wikia.com/%s:%s' + https://docs.genius.com/#search-h2 - def fetch(self, artist, title): - url = self.build_url(artist, title) - html = self.fetch_url(url) - if not html: + :returns: json response + """ + search_url = self.base_url + "/search" + data = {'q': title + " " + artist.lower()} + try: + response = requests.get( + search_url, data=data, headers=self.headers) + except requests.RequestException as exc: + self._log.debug('Genius API request failed: {0}', exc) + return None + + try: + return response.json() + except ValueError: + return None + + def _scrape_lyrics_from_html(self, html): + """Scrape lyrics from a given genius.com html""" + + soup = try_parse_html(html) + if not soup: return - # Get the HTML fragment inside the appropriate HTML element and then - # extract the text from it. - html_frag = extract_text_in(html, u"
") - if html_frag: - lyrics = _scrape_strip_cruft(html_frag, True) + # Remove script tags that they put in the middle of the lyrics. + [h.extract() for h in soup('script')] - if lyrics and 'Unfortunately, we are not licensed' not in lyrics: - return lyrics + # Most of the time, the page contains a div with class="lyrics" where + # all of the lyrics can be found already correctly formatted + # Sometimes, though, it packages the lyrics into separate divs, most + # likely for easier ad placement + lyrics_div = soup.find("div", class_="lyrics") + if not lyrics_div: + self._log.debug('Received unusual song page html') + verse_div = soup.find("div", + class_=re.compile("Lyrics__Container")) + if not verse_div: + if soup.find("div", + class_=re.compile("LyricsPlaceholder__Message"), + string="This song is an instrumental"): + self._log.debug('Detected instrumental') + return "[Instrumental]" + else: + self._log.debug("Couldn't scrape page using known layouts") + return None + + lyrics_div = verse_div.parent + for br in lyrics_div.find_all("br"): + br.replace_with("\n") + ads = lyrics_div.find_all("div", + class_=re.compile("InreadAd__Container")) + for ad in ads: + ad.replace_with("\n") + + return lyrics_div.get_text() + + +class Tekstowo(Backend): + # Fetch lyrics from Tekstowo.pl. + REQUIRES_BS = True + + BASE_URL = 'http://www.tekstowo.pl' + URL_PATTERN = BASE_URL + '/wyszukaj.html?search-title=%s&search-artist=%s' + + def fetch(self, artist, title): + url = self.build_url(title, artist) + search_results = self.fetch_url(url) + if not search_results: + return None + + song_page_url = self.parse_search_results(search_results) + if not song_page_url: + return None + + song_page_html = self.fetch_url(song_page_url) + if not song_page_html: + return None + + return self.extract_lyrics(song_page_html) + + def parse_search_results(self, html): + html = _scrape_strip_cruft(html) + html = _scrape_merge_paragraphs(html) + + soup = try_parse_html(html) + if not soup: + return None + + content_div = soup.find("div", class_="content") + if not content_div: + return None + + card_div = content_div.find("div", class_="card") + if not card_div: + return None + + song_rows = card_div.find_all("div", class_="box-przeboje") + if not song_rows: + return None + + song_row = song_rows[0] + if not song_row: + return None + + link = song_row.find('a') + if not link: + return None + + return self.BASE_URL + link.get('href') + + def extract_lyrics(self, html): + html = _scrape_strip_cruft(html) + html = _scrape_merge_paragraphs(html) + + soup = try_parse_html(html) + if not soup: + return None + + lyrics_div = soup.find("div", class_="song-text") + if not lyrics_div: + return None + + return lyrics_div.get_text() def remove_credits(text): @@ -446,7 +517,8 @@ def _scrape_strip_cruft(html, plain_text_out=False): html = html.replace('\r', '\n') # Normalize EOL. html = re.sub(r' +', ' ', html) # Whitespaces collapse. html = BREAK_RE.sub('\n', html) #
eats up surrounding '\n'. - html = re.sub(r'<(script).*?(?s)', '', html) # Strip script tags. + html = re.sub(r'(?s)<(script).*?', '', html) # Strip script tags. + html = re.sub('\u2005', " ", html) # replace unicode with regular space if plain_text_out: # Strip remaining HTML tags html = COMMENT_RE.sub('', html) @@ -466,12 +538,6 @@ def scrape_lyrics_from_html(html): """Scrape lyrics from a URL. If no lyrics can be found, return None instead. """ - if not HAS_BEAUTIFUL_SOUP: - return None - - if not html: - return None - def is_text_notcode(text): length = len(text) return (length > 20 and @@ -481,10 +547,8 @@ def scrape_lyrics_from_html(html): html = _scrape_merge_paragraphs(html) # extract all long text blocks that are not code - try: - soup = BeautifulSoup(html, "html.parser", - parse_only=SoupStrainer(text=is_text_notcode)) - except HTMLParseError: + soup = try_parse_html(html, parse_only=SoupStrainer(text=is_text_notcode)) + if not soup: return None # Get the longest text element (if any). @@ -498,8 +562,10 @@ def scrape_lyrics_from_html(html): class Google(Backend): """Fetch lyrics from Google search results.""" + REQUIRES_BS = True + def __init__(self, config, log): - super(Google, self).__init__(config, log) + super().__init__(config, log) self.api_key = config['google_API_key'].as_str() self.engine_id = config['google_engine_ID'].as_str() @@ -511,7 +577,7 @@ class Google(Backend): bad_triggers_occ = [] nb_lines = text.count('\n') if nb_lines <= 1: - self._log.debug(u"Ignoring too short lyrics '{0}'", text) + self._log.debug("Ignoring too short lyrics '{0}'", text) return False elif nb_lines < 5: bad_triggers_occ.append('too_short') @@ -522,14 +588,14 @@ class Google(Backend): bad_triggers = ['lyrics', 'copyright', 'property', 'links'] if artist: - bad_triggers_occ += [artist] + bad_triggers += [artist] for item in bad_triggers: bad_triggers_occ += [item] * len(re.findall(r'\W%s\W' % item, text, re.I)) if bad_triggers_occ: - self._log.debug(u'Bad triggers detected: {0}', bad_triggers_occ) + self._log.debug('Bad triggers detected: {0}', bad_triggers_occ) return len(bad_triggers_occ) < 2 def slugify(self, text): @@ -537,14 +603,14 @@ class Google(Backend): """ text = re.sub(r"[-'_\s]", '_', text) text = re.sub(r"_+", '_', text).strip('_') - pat = "([^,\(]*)\((.*?)\)" # Remove content within parentheses - text = re.sub(pat, '\g<1>', text).strip() + pat = r"([^,\(]*)\((.*?)\)" # Remove content within parentheses + text = re.sub(pat, r'\g<1>', text).strip() try: text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore') - text = six.text_type(re.sub('[-\s]+', ' ', text.decode('utf-8'))) + text = str(re.sub(r'[-\s]+', ' ', text.decode('utf-8'))) except UnicodeDecodeError: - self._log.exception(u"Failing to normalize '{0}'", text) + self._log.exception("Failing to normalize '{0}'", text) return text BY_TRANS = ['by', 'par', 'de', 'von'] @@ -556,7 +622,7 @@ class Google(Backend): """ title = self.slugify(title.lower()) artist = self.slugify(artist.lower()) - sitename = re.search(u"//([^/]+)/.*", + sitename = re.search("//([^/]+)/.*", self.slugify(url_link.lower())).group(1) url_title = self.slugify(url_title.lower()) @@ -570,7 +636,7 @@ class Google(Backend): [artist, sitename, sitename.replace('www.', '')] + \ self.LYRICS_TRANS tokens = [re.escape(t) for t in tokens] - song_title = re.sub(u'(%s)' % u'|'.join(tokens), u'', url_title) + song_title = re.sub('(%s)' % '|'.join(tokens), '', url_title) song_title = song_title.strip('_|') typo_ratio = .9 @@ -578,53 +644,57 @@ class Google(Backend): return ratio >= typo_ratio def fetch(self, artist, title): - query = u"%s %s" % (artist, title) - url = u'https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=%s' \ + query = f"{artist} {title}" + url = 'https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=%s' \ % (self.api_key, self.engine_id, urllib.parse.quote(query.encode('utf-8'))) data = self.fetch_url(url) if not data: - self._log.debug(u'google backend returned no data') + self._log.debug('google backend returned no data') return None try: data = json.loads(data) except ValueError as exc: - self._log.debug(u'google backend returned malformed JSON: {}', exc) + self._log.debug('google backend returned malformed JSON: {}', exc) if 'error' in data: reason = data['error']['errors'][0]['reason'] - self._log.debug(u'google backend error: {0}', reason) + self._log.debug('google backend error: {0}', reason) return None if 'items' in data.keys(): for item in data['items']: url_link = item['link'] - url_title = item.get('title', u'') + url_title = item.get('title', '') if not self.is_page_candidate(url_link, url_title, title, artist): continue html = self.fetch_url(url_link) + if not html: + continue lyrics = scrape_lyrics_from_html(html) if not lyrics: continue if self.is_lyrics(lyrics, artist): - self._log.debug(u'got lyrics from {0}', + self._log.debug('got lyrics from {0}', item['displayLink']) return lyrics + return None + class LyricsPlugin(plugins.BeetsPlugin): - SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius'] + SOURCES = ['google', 'musixmatch', 'genius', 'tekstowo'] SOURCE_BACKENDS = { 'google': Google, - 'lyricwiki': LyricsWiki, 'musixmatch': MusiXmatch, 'genius': Genius, + 'tekstowo': Tekstowo, } def __init__(self): - super(LyricsPlugin, self).__init__() + super().__init__() self.import_stages = [self.imported] self.config.add({ 'auto': True, @@ -632,7 +702,7 @@ class LyricsPlugin(plugins.BeetsPlugin): 'bing_lang_from': [], 'bing_lang_to': None, 'google_API_key': None, - 'google_engine_ID': u'009217259823014548361:lndtuqkycfu', + 'google_engine_ID': '009217259823014548361:lndtuqkycfu', 'genius_api_key': "Ryq93pUGm8bM6eUWwD_M3NOFFDAtp2yEE7W" "76V-uFL5jks5dNvcGCdarqFjDhP9c", @@ -648,7 +718,7 @@ class LyricsPlugin(plugins.BeetsPlugin): # State information for the ReST writer. # First, the current artist we're writing. - self.artist = u'Unknown artist' + self.artist = 'Unknown artist' # The current album: False means no album yet. self.album = False # The current rest file content. None means the file is not @@ -659,40 +729,44 @@ class LyricsPlugin(plugins.BeetsPlugin): sources = plugins.sanitize_choices( self.config['sources'].as_str_seq(), available_sources) + if not HAS_BEAUTIFUL_SOUP: + sources = self.sanitize_bs_sources(sources) + if 'google' in sources: if not self.config['google_API_key'].get(): # We log a *debug* message here because the default # configuration includes `google`. This way, the source # is silent by default but can be enabled just by # setting an API key. - self._log.debug(u'Disabling google source: ' - u'no API key configured.') + self._log.debug('Disabling google source: ' + 'no API key configured.') sources.remove('google') - elif not HAS_BEAUTIFUL_SOUP: - self._log.warning(u'To use the google lyrics source, you must ' - u'install the beautifulsoup4 module. See ' - u'the documentation for further details.') - sources.remove('google') - - if 'genius' in sources and not HAS_BEAUTIFUL_SOUP: - self._log.debug( - u'The Genius backend requires BeautifulSoup, which is not ' - u'installed, so the source is disabled.' - ) - sources.remove('genius') self.config['bing_lang_from'] = [ x.lower() for x in self.config['bing_lang_from'].as_str_seq()] self.bing_auth_token = None if not HAS_LANGDETECT and self.config['bing_client_secret'].get(): - self._log.warning(u'To use bing translations, you need to ' - u'install the langdetect module. See the ' - u'documentation for further details.') + self._log.warning('To use bing translations, you need to ' + 'install the langdetect module. See the ' + 'documentation for further details.') self.backends = [self.SOURCE_BACKENDS[source](self.config, self._log) for source in sources] + def sanitize_bs_sources(self, sources): + enabled_sources = [] + for source in sources: + if self.SOURCE_BACKENDS[source].REQUIRES_BS: + self._log.debug('To use the %s lyrics source, you must ' + 'install the beautifulsoup4 module. See ' + 'the documentation for further details.' + % source) + else: + enabled_sources.append(source) + + return enabled_sources + def get_bing_access_token(self): params = { 'client_id': 'beets', @@ -708,30 +782,30 @@ class LyricsPlugin(plugins.BeetsPlugin): if 'access_token' in oauth_token: return "Bearer " + oauth_token['access_token'] else: - self._log.warning(u'Could not get Bing Translate API access token.' - u' Check your "bing_client_secret" password') + self._log.warning('Could not get Bing Translate API access token.' + ' Check your "bing_client_secret" password') def commands(self): cmd = ui.Subcommand('lyrics', help='fetch song lyrics') cmd.parser.add_option( - u'-p', u'--print', dest='printlyr', + '-p', '--print', dest='printlyr', action='store_true', default=False, - help=u'print lyrics to console', + help='print lyrics to console', ) cmd.parser.add_option( - u'-r', u'--write-rest', dest='writerest', + '-r', '--write-rest', dest='writerest', action='store', default=None, metavar='dir', - help=u'write lyrics to given directory as ReST files', + help='write lyrics to given directory as ReST files', ) cmd.parser.add_option( - u'-f', u'--force', dest='force_refetch', + '-f', '--force', dest='force_refetch', action='store_true', default=False, - help=u'always re-download lyrics', + help='always re-download lyrics', ) cmd.parser.add_option( - u'-l', u'--local', dest='local_only', + '-l', '--local', dest='local_only', action='store_true', default=False, - help=u'do not fetch missing lyrics', + help='do not fetch missing lyrics', ) def func(lib, opts, args): @@ -740,7 +814,8 @@ class LyricsPlugin(plugins.BeetsPlugin): write = ui.should_write() if opts.writerest: self.writerest_indexes(opts.writerest) - for item in lib.items(ui.decargs(args)): + items = lib.items(ui.decargs(args)) + for item in items: if not opts.local_only and not self.config['local']: self.fetch_item_lyrics( lib, item, write, @@ -750,51 +825,55 @@ class LyricsPlugin(plugins.BeetsPlugin): if opts.printlyr: ui.print_(item.lyrics) if opts.writerest: - self.writerest(opts.writerest, item) - if opts.writerest: - # flush last artist - self.writerest(opts.writerest, None) - ui.print_(u'ReST files generated. to build, use one of:') - ui.print_(u' sphinx-build -b html %s _build/html' + self.appendrest(opts.writerest, item) + if opts.writerest and items: + # flush last artist & write to ReST + self.writerest(opts.writerest) + ui.print_('ReST files generated. to build, use one of:') + ui.print_(' sphinx-build -b html %s _build/html' % opts.writerest) - ui.print_(u' sphinx-build -b epub %s _build/epub' + ui.print_(' sphinx-build -b epub %s _build/epub' % opts.writerest) - ui.print_((u' sphinx-build -b latex %s _build/latex ' - u'&& make -C _build/latex all-pdf') + ui.print_((' sphinx-build -b latex %s _build/latex ' + '&& make -C _build/latex all-pdf') % opts.writerest) cmd.func = func return [cmd] - def writerest(self, directory, item): - """Write the item to an ReST file + def appendrest(self, directory, item): + """Append the item to an ReST file This will keep state (in the `rest` variable) in order to avoid writing continuously to the same files. """ - if item is None or slug(self.artist) != slug(item.albumartist): - if self.rest is not None: - path = os.path.join(directory, 'artists', - slug(self.artist) + u'.rst') - with open(path, 'wb') as output: - output.write(self.rest.encode('utf-8')) - self.rest = None - if item is None: - return + if slug(self.artist) != slug(item.albumartist): + # Write current file and start a new one ~ item.albumartist + self.writerest(directory) self.artist = item.albumartist.strip() - self.rest = u"%s\n%s\n\n.. contents::\n :local:\n\n" \ + self.rest = "%s\n%s\n\n.. contents::\n :local:\n\n" \ % (self.artist, - u'=' * len(self.artist)) + '=' * len(self.artist)) + if self.album != item.album: tmpalbum = self.album = item.album.strip() if self.album == '': - tmpalbum = u'Unknown album' - self.rest += u"%s\n%s\n\n" % (tmpalbum, u'-' * len(tmpalbum)) - title_str = u":index:`%s`" % item.title.strip() - block = u'| ' + item.lyrics.replace(u'\n', u'\n| ') - self.rest += u"%s\n%s\n\n%s\n\n" % (title_str, - u'~' * len(title_str), - block) + tmpalbum = 'Unknown album' + self.rest += "{}\n{}\n\n".format(tmpalbum, '-' * len(tmpalbum)) + title_str = ":index:`%s`" % item.title.strip() + block = '| ' + item.lyrics.replace('\n', '\n| ') + self.rest += "{}\n{}\n\n{}\n\n".format(title_str, + '~' * len(title_str), + block) + + def writerest(self, directory): + """Write self.rest to a ReST file + """ + if self.rest is not None and self.artist is not None: + path = os.path.join(directory, 'artists', + slug(self.artist) + '.rst') + with open(path, 'wb') as output: + output.write(self.rest.encode('utf-8')) def writerest_indexes(self, directory): """Write conf.py and index.rst files necessary for Sphinx @@ -832,7 +911,7 @@ class LyricsPlugin(plugins.BeetsPlugin): """ # Skip if the item already has lyrics. if not force and item.lyrics: - self._log.info(u'lyrics already present: {0}', item) + self._log.info('lyrics already present: {0}', item) return lyrics = None @@ -841,10 +920,10 @@ class LyricsPlugin(plugins.BeetsPlugin): if any(lyrics): break - lyrics = u"\n\n---\n\n".join([l for l in lyrics if l]) + lyrics = "\n\n---\n\n".join([l for l in lyrics if l]) if lyrics: - self._log.info(u'fetched lyrics: {0}', item) + self._log.info('fetched lyrics: {0}', item) if HAS_LANGDETECT and self.config['bing_client_secret'].get(): lang_from = langdetect.detect(lyrics) if self.config['bing_lang_to'].get() != lang_from and ( @@ -854,7 +933,7 @@ class LyricsPlugin(plugins.BeetsPlugin): lyrics = self.append_translation( lyrics, self.config['bing_lang_to']) else: - self._log.info(u'lyrics not found: {0}', item) + self._log.info('lyrics not found: {0}', item) fallback = self.config['fallback'].get() if fallback: lyrics = fallback @@ -872,12 +951,12 @@ class LyricsPlugin(plugins.BeetsPlugin): for backend in self.backends: lyrics = backend.fetch(artist, title) if lyrics: - self._log.debug(u'got lyrics from backend: {0}', + self._log.debug('got lyrics from backend: {0}', backend.__class__.__name__) return _scrape_strip_cruft(lyrics, True) def append_translation(self, text, to_lang): - import xml.etree.ElementTree as ET + from xml.etree import ElementTree if not self.bing_auth_token: self.bing_auth_token = self.get_bing_access_token() @@ -895,10 +974,11 @@ class LyricsPlugin(plugins.BeetsPlugin): self.bing_auth_token = None return self.append_translation(text, to_lang) return text - lines_translated = ET.fromstring(r.text.encode('utf-8')).text + lines_translated = ElementTree.fromstring( + r.text.encode('utf-8')).text # Use a translation mapping dict to build resulting lyrics translations = dict(zip(text_lines, lines_translated.split('|'))) result = '' for line in text.split('\n'): - result += '%s / %s\n' % (line, translations[line]) + result += '{} / {}\n'.format(line, translations[line]) return result diff --git a/libs/common/beetsplug/mbcollection.py b/libs/common/beetsplug/mbcollection.py index d99c386c..f4a0d161 100644 --- a/libs/common/beetsplug/mbcollection.py +++ b/libs/common/beetsplug/mbcollection.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright (c) 2011, Jeffrey Aylesworth # @@ -13,7 +12,6 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets.ui import Subcommand @@ -34,11 +32,11 @@ def mb_call(func, *args, **kwargs): try: return func(*args, **kwargs) except musicbrainzngs.AuthenticationError: - raise ui.UserError(u'authentication with MusicBrainz failed') + raise ui.UserError('authentication with MusicBrainz failed') except (musicbrainzngs.ResponseError, musicbrainzngs.NetworkError) as exc: - raise ui.UserError(u'MusicBrainz API error: {0}'.format(exc)) + raise ui.UserError(f'MusicBrainz API error: {exc}') except musicbrainzngs.UsageError: - raise ui.UserError(u'MusicBrainz credentials missing') + raise ui.UserError('MusicBrainz credentials missing') def submit_albums(collection_id, release_ids): @@ -55,7 +53,7 @@ def submit_albums(collection_id, release_ids): class MusicBrainzCollectionPlugin(BeetsPlugin): def __init__(self): - super(MusicBrainzCollectionPlugin, self).__init__() + super().__init__() config['musicbrainz']['pass'].redact = True musicbrainzngs.auth( config['musicbrainz']['user'].as_str(), @@ -63,7 +61,7 @@ class MusicBrainzCollectionPlugin(BeetsPlugin): ) self.config.add({ 'auto': False, - 'collection': u'', + 'collection': '', 'remove': False, }) if self.config['auto']: @@ -72,18 +70,18 @@ class MusicBrainzCollectionPlugin(BeetsPlugin): def _get_collection(self): collections = mb_call(musicbrainzngs.get_collections) if not collections['collection-list']: - raise ui.UserError(u'no collections exist for user') + raise ui.UserError('no collections exist for user') # Get all collection IDs, avoiding event collections collection_ids = [x['id'] for x in collections['collection-list']] if not collection_ids: - raise ui.UserError(u'No collection found.') + raise ui.UserError('No collection found.') # Check that the collection exists so we can present a nice error collection = self.config['collection'].as_str() if collection: if collection not in collection_ids: - raise ui.UserError(u'invalid collection ID: {}' + raise ui.UserError('invalid collection ID: {}' .format(collection)) return collection @@ -110,7 +108,7 @@ class MusicBrainzCollectionPlugin(BeetsPlugin): def commands(self): mbupdate = Subcommand('mbupdate', - help=u'Update MusicBrainz collection') + help='Update MusicBrainz collection') mbupdate.parser.add_option('-r', '--remove', action='store_true', default=None, @@ -120,7 +118,7 @@ class MusicBrainzCollectionPlugin(BeetsPlugin): return [mbupdate] def remove_missing(self, collection_id, lib_albums): - lib_ids = set([x.mb_albumid for x in lib_albums]) + lib_ids = {x.mb_albumid for x in lib_albums} albums_in_collection = self._get_albums_in_collection(collection_id) remove_me = list(set(albums_in_collection) - lib_ids) for i in range(0, len(remove_me), FETCH_CHUNK_SIZE): @@ -154,13 +152,13 @@ class MusicBrainzCollectionPlugin(BeetsPlugin): if re.match(UUID_REGEX, aid): album_ids.append(aid) else: - self._log.info(u'skipping invalid MBID: {0}', aid) + self._log.info('skipping invalid MBID: {0}', aid) # Submit to MusicBrainz. self._log.info( - u'Updating MusicBrainz collection {0}...', collection_id + 'Updating MusicBrainz collection {0}...', collection_id ) submit_albums(collection_id, album_ids) if remove_missing: self.remove_missing(collection_id, lib.albums()) - self._log.info(u'...MusicBrainz collection updated.') + self._log.info('...MusicBrainz collection updated.') diff --git a/libs/common/beetsplug/mbsubmit.py b/libs/common/beetsplug/mbsubmit.py index 02bd5f69..3ede0125 100644 --- a/libs/common/beetsplug/mbsubmit.py +++ b/libs/common/beetsplug/mbsubmit.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson and Diego Moreda. # @@ -19,11 +18,9 @@ This plugin allows the user to print track information in a format that is parseable by the MusicBrainz track parser [1]. Programmatic submitting is not implemented by MusicBrainz yet. -[1] http://wiki.musicbrainz.org/History:How_To_Parse_Track_Listings +[1] https://wiki.musicbrainz.org/History:How_To_Parse_Track_Listings """ -from __future__ import division, absolute_import, print_function - from beets.autotag import Recommendation from beets.plugins import BeetsPlugin @@ -33,10 +30,10 @@ from beetsplug.info import print_data class MBSubmitPlugin(BeetsPlugin): def __init__(self): - super(MBSubmitPlugin, self).__init__() + super().__init__() self.config.add({ - 'format': u'$track. $title - $artist ($length)', + 'format': '$track. $title - $artist ($length)', 'threshold': 'medium', }) @@ -53,7 +50,7 @@ class MBSubmitPlugin(BeetsPlugin): def before_choose_candidate_event(self, session, task): if task.rec <= self.threshold: - return [PromptChoice(u'p', u'Print tracks', self.print_tracks)] + return [PromptChoice('p', 'Print tracks', self.print_tracks)] def print_tracks(self, session, task): for i in sorted(task.items, key=lambda i: i.track): diff --git a/libs/common/beetsplug/mbsync.py b/libs/common/beetsplug/mbsync.py index 1764a177..26778830 100644 --- a/libs/common/beetsplug/mbsync.py +++ b/libs/common/beetsplug/mbsync.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Jakob Schnitzer. # @@ -15,47 +14,37 @@ """Update library's tags using MusicBrainz. """ -from __future__ import division, absolute_import, print_function -from beets.plugins import BeetsPlugin +from beets.plugins import BeetsPlugin, apply_item_changes from beets import autotag, library, ui, util from beets.autotag import hooks from collections import defaultdict +import re -def apply_item_changes(lib, item, move, pretend, write): - """Store, move and write the item according to the arguments. - """ - if not pretend: - # Move the item if it's in the library. - if move and lib.directory in util.ancestry(item.path): - item.move(with_album=False) - - if write: - item.try_write() - item.store() +MBID_REGEX = r"(\d|\w){8}-(\d|\w){4}-(\d|\w){4}-(\d|\w){4}-(\d|\w){12}" class MBSyncPlugin(BeetsPlugin): def __init__(self): - super(MBSyncPlugin, self).__init__() + super().__init__() def commands(self): cmd = ui.Subcommand('mbsync', - help=u'update metadata from musicbrainz') + help='update metadata from musicbrainz') cmd.parser.add_option( - u'-p', u'--pretend', action='store_true', - help=u'show all changes but do nothing') + '-p', '--pretend', action='store_true', + help='show all changes but do nothing') cmd.parser.add_option( - u'-m', u'--move', action='store_true', dest='move', - help=u"move files in the library directory") + '-m', '--move', action='store_true', dest='move', + help="move files in the library directory") cmd.parser.add_option( - u'-M', u'--nomove', action='store_false', dest='move', - help=u"don't move files in library") + '-M', '--nomove', action='store_false', dest='move', + help="don't move files in library") cmd.parser.add_option( - u'-W', u'--nowrite', action='store_false', + '-W', '--nowrite', action='store_false', default=None, dest='write', - help=u"don't write updated metadata to files") + help="don't write updated metadata to files") cmd.parser.add_format_option() cmd.func = self.func return [cmd] @@ -75,17 +64,23 @@ class MBSyncPlugin(BeetsPlugin): """Retrieve and apply info from the autotagger for items matched by query. """ - for item in lib.items(query + [u'singleton:true']): + for item in lib.items(query + ['singleton:true']): item_formatted = format(item) if not item.mb_trackid: - self._log.info(u'Skipping singleton with no mb_trackid: {0}', + self._log.info('Skipping singleton with no mb_trackid: {0}', item_formatted) continue + # Do we have a valid MusicBrainz track ID? + if not re.match(MBID_REGEX, item.mb_trackid): + self._log.info('Skipping singleton with invalid mb_trackid:' + + ' {0}', item_formatted) + continue + # Get the MusicBrainz recording info. track_info = hooks.track_for_mbid(item.mb_trackid) if not track_info: - self._log.info(u'Recording ID not found: {0} for track {0}', + self._log.info('Recording ID not found: {0} for track {0}', item.mb_trackid, item_formatted) continue @@ -103,16 +98,22 @@ class MBSyncPlugin(BeetsPlugin): for a in lib.albums(query): album_formatted = format(a) if not a.mb_albumid: - self._log.info(u'Skipping album with no mb_albumid: {0}', + self._log.info('Skipping album with no mb_albumid: {0}', album_formatted) continue items = list(a.items()) + # Do we have a valid MusicBrainz album ID? + if not re.match(MBID_REGEX, a.mb_albumid): + self._log.info('Skipping album with invalid mb_albumid: {0}', + album_formatted) + continue + # Get the MusicBrainz album information. album_info = hooks.album_for_mbid(a.mb_albumid) if not album_info: - self._log.info(u'Release ID {0} not found for album {1}', + self._log.info('Release ID {0} not found for album {1}', a.mb_albumid, album_formatted) continue @@ -120,7 +121,7 @@ class MBSyncPlugin(BeetsPlugin): # Map release track and recording MBIDs to their information. # Recordings can appear multiple times on a release, so each MBID # maps to a list of TrackInfo objects. - releasetrack_index = dict() + releasetrack_index = {} track_index = defaultdict(list) for track_info in album_info.tracks: releasetrack_index[track_info.release_track_id] = track_info @@ -148,7 +149,7 @@ class MBSyncPlugin(BeetsPlugin): break # Apply. - self._log.debug(u'applying changes to {}', album_formatted) + self._log.debug('applying changes to {}', album_formatted) with lib.transaction(): autotag.apply_metadata(album_info, mapping) changed = False @@ -173,5 +174,5 @@ class MBSyncPlugin(BeetsPlugin): # Move album art (and any inconsistent items). if move and lib.directory in util.ancestry(items[0].path): - self._log.debug(u'moving album {0}', album_formatted) + self._log.debug('moving album {0}', album_formatted) a.move() diff --git a/libs/common/beetsplug/metasync/__init__.py b/libs/common/beetsplug/metasync/__init__.py index 02f0b0f9..361071fb 100644 --- a/libs/common/beetsplug/metasync/__init__.py +++ b/libs/common/beetsplug/metasync/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Heinz Wiesinger. # @@ -16,15 +15,13 @@ """Synchronize information from music player libraries """ -from __future__ import division, absolute_import, print_function from abc import abstractmethod, ABCMeta from importlib import import_module -from beets.util.confit import ConfigValueError +from confuse import ConfigValueError from beets import ui from beets.plugins import BeetsPlugin -import six METASYNC_MODULE = 'beetsplug.metasync' @@ -36,7 +33,7 @@ SOURCES = { } -class MetaSource(six.with_metaclass(ABCMeta, object)): +class MetaSource(metaclass=ABCMeta): def __init__(self, config, log): self.item_types = {} self.config = config @@ -77,7 +74,7 @@ class MetaSyncPlugin(BeetsPlugin): item_types = load_item_types() def __init__(self): - super(MetaSyncPlugin, self).__init__() + super().__init__() def commands(self): cmd = ui.Subcommand('metasync', @@ -108,7 +105,7 @@ class MetaSyncPlugin(BeetsPlugin): # Avoid needlessly instantiating meta sources (can be expensive) if not items: - self._log.info(u'No items found matching query') + self._log.info('No items found matching query') return # Instantiate the meta sources @@ -116,18 +113,18 @@ class MetaSyncPlugin(BeetsPlugin): try: cls = META_SOURCES[player] except KeyError: - self._log.error(u'Unknown metadata source \'{0}\''.format( + self._log.error('Unknown metadata source \'{}\''.format( player)) try: meta_source_instances[player] = cls(self.config, self._log) except (ImportError, ConfigValueError) as e: - self._log.error(u'Failed to instantiate metadata source ' - u'\'{0}\': {1}'.format(player, e)) + self._log.error('Failed to instantiate metadata source ' + '\'{}\': {}'.format(player, e)) # Avoid needlessly iterating over items if not meta_source_instances: - self._log.error(u'No valid metadata sources found') + self._log.error('No valid metadata sources found') return # Sync the items with all of the meta sources diff --git a/libs/common/beetsplug/metasync/amarok.py b/libs/common/beetsplug/metasync/amarok.py index 0622fc17..a49eecc3 100644 --- a/libs/common/beetsplug/metasync/amarok.py +++ b/libs/common/beetsplug/metasync/amarok.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Heinz Wiesinger. # @@ -16,7 +15,6 @@ """Synchronize information from amarok's library via dbus """ -from __future__ import division, absolute_import, print_function from os.path import basename from datetime import datetime @@ -49,14 +47,14 @@ class Amarok(MetaSource): 'amarok_lastplayed': DateType(), } - queryXML = u' \ + query_xml = ' \ \ \ \ ' def __init__(self, config, log): - super(Amarok, self).__init__(config, log) + super().__init__(config, log) if not dbus: raise ImportError('failed to import dbus') @@ -72,7 +70,7 @@ class Amarok(MetaSource): # of the result set. So query for the filename and then try to match # the correct item from the results we get back results = self.collection.Query( - self.queryXML % quoteattr(basename(path)) + self.query_xml % quoteattr(basename(path)) ) for result in results: if result['xesam:url'] != path: diff --git a/libs/common/beetsplug/metasync/itunes.py b/libs/common/beetsplug/metasync/itunes.py index 17ab1637..e50a5713 100644 --- a/libs/common/beetsplug/metasync/itunes.py +++ b/libs/common/beetsplug/metasync/itunes.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Tom Jaspers. # @@ -16,7 +15,6 @@ """Synchronize information from iTunes's library """ -from __future__ import division, absolute_import, print_function from contextlib import contextmanager import os @@ -24,13 +22,13 @@ import shutil import tempfile import plistlib -from six.moves.urllib.parse import urlparse, unquote +from urllib.parse import urlparse, unquote from time import mktime from beets import util from beets.dbcore import types from beets.library import DateType -from beets.util.confit import ConfigValueError +from confuse import ConfigValueError from beetsplug.metasync import MetaSource @@ -63,15 +61,16 @@ def _norm_itunes_path(path): class Itunes(MetaSource): item_types = { - 'itunes_rating': types.INTEGER, # 0..100 scale - 'itunes_playcount': types.INTEGER, - 'itunes_skipcount': types.INTEGER, - 'itunes_lastplayed': DateType(), + 'itunes_rating': types.INTEGER, # 0..100 scale + 'itunes_playcount': types.INTEGER, + 'itunes_skipcount': types.INTEGER, + 'itunes_lastplayed': DateType(), 'itunes_lastskipped': DateType(), + 'itunes_dateadded': DateType(), } def __init__(self, config, log): - super(Itunes, self).__init__(config, log) + super().__init__(config, log) config.add({'itunes': { 'library': '~/Music/iTunes/iTunes Library.xml' @@ -82,19 +81,20 @@ class Itunes(MetaSource): try: self._log.debug( - u'loading iTunes library from {0}'.format(library_path)) + f'loading iTunes library from {library_path}') with create_temporary_copy(library_path) as library_copy: - raw_library = plistlib.readPlist(library_copy) - except IOError as e: - raise ConfigValueError(u'invalid iTunes library: ' + e.strerror) + with open(library_copy, 'rb') as library_copy_f: + raw_library = plistlib.load(library_copy_f) + except OSError as e: + raise ConfigValueError('invalid iTunes library: ' + e.strerror) except Exception: # It's likely the user configured their '.itl' library (<> xml) if os.path.splitext(library_path)[1].lower() != '.xml': - hint = u': please ensure that the configured path' \ - u' points to the .XML library' + hint = ': please ensure that the configured path' \ + ' points to the .XML library' else: hint = '' - raise ConfigValueError(u'invalid iTunes library' + hint) + raise ConfigValueError('invalid iTunes library' + hint) # Make the iTunes library queryable using the path self.collection = {_norm_itunes_path(track['Location']): track @@ -105,7 +105,7 @@ class Itunes(MetaSource): result = self.collection.get(util.bytestring_path(item.path).lower()) if not result: - self._log.warning(u'no iTunes match found for {0}'.format(item)) + self._log.warning(f'no iTunes match found for {item}') return item.itunes_rating = result.get('Rating') @@ -119,3 +119,7 @@ class Itunes(MetaSource): if result.get('Skip Date'): item.itunes_lastskipped = mktime( result.get('Skip Date').timetuple()) + + if result.get('Date Added'): + item.itunes_dateadded = mktime( + result.get('Date Added').timetuple()) diff --git a/libs/common/beetsplug/missing.py b/libs/common/beetsplug/missing.py index 8f0790f2..771978c1 100644 --- a/libs/common/beetsplug/missing.py +++ b/libs/common/beetsplug/missing.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Pedro Silva. # Copyright 2017, Quentin Young. @@ -16,7 +15,6 @@ """List missing tracks. """ -from __future__ import division, absolute_import, print_function import musicbrainzngs @@ -93,7 +91,7 @@ class MissingPlugin(BeetsPlugin): } def __init__(self): - super(MissingPlugin, self).__init__() + super().__init__() self.config.add({ 'count': False, @@ -107,14 +105,14 @@ class MissingPlugin(BeetsPlugin): help=__doc__, aliases=['miss']) self._command.parser.add_option( - u'-c', u'--count', dest='count', action='store_true', - help=u'count missing tracks per album') + '-c', '--count', dest='count', action='store_true', + help='count missing tracks per album') self._command.parser.add_option( - u'-t', u'--total', dest='total', action='store_true', - help=u'count total of missing tracks') + '-t', '--total', dest='total', action='store_true', + help='count total of missing tracks') self._command.parser.add_option( - u'-a', u'--album', dest='album', action='store_true', - help=u'show missing albums for artist instead of tracks') + '-a', '--album', dest='album', action='store_true', + help='show missing albums for artist instead of tracks') self._command.parser.add_format_option() def commands(self): @@ -173,10 +171,10 @@ class MissingPlugin(BeetsPlugin): # build dict mapping artist to list of all albums for artist, albums in albums_by_artist.items(): if artist[1] is None or artist[1] == "": - albs_no_mbid = [u"'" + a['album'] + u"'" for a in albums] + albs_no_mbid = ["'" + a['album'] + "'" for a in albums] self._log.info( - u"No musicbrainz ID for artist '{}' found in album(s) {}; " - "skipping", artist[0], u", ".join(albs_no_mbid) + "No musicbrainz ID for artist '{}' found in album(s) {}; " + "skipping", artist[0], ", ".join(albs_no_mbid) ) continue @@ -185,7 +183,7 @@ class MissingPlugin(BeetsPlugin): release_groups = resp['release-group-list'] except MusicBrainzError as err: self._log.info( - u"Couldn't fetch info for artist '{}' ({}) - '{}'", + "Couldn't fetch info for artist '{}' ({}) - '{}'", artist[0], artist[1], err ) continue @@ -207,7 +205,7 @@ class MissingPlugin(BeetsPlugin): missing_titles = {rg['title'] for rg in missing} for release_title in missing_titles: - print_(u"{} - {}".format(artist[0], release_title)) + print_("{} - {}".format(artist[0], release_title)) if total: print(total_missing) @@ -216,13 +214,13 @@ class MissingPlugin(BeetsPlugin): """Query MusicBrainz to determine items missing from `album`. """ item_mbids = [x.mb_trackid for x in album.items()] - if len([i for i in album.items()]) < album.albumtotal: + if len(list(album.items())) < album.albumtotal: # fetch missing items # TODO: Implement caching that without breaking other stuff album_info = hooks.album_for_mbid(album.mb_albumid) for track_info in getattr(album_info, 'tracks', []): if track_info.track_id not in item_mbids: item = _item(track_info, album_info, album.id) - self._log.debug(u'track {0} in album {1}', + self._log.debug('track {0} in album {1}', track_info.track_id, album_info.album_id) yield item diff --git a/libs/common/beetsplug/mpdstats.py b/libs/common/beetsplug/mpdstats.py index e5e82d48..96291cf4 100644 --- a/libs/common/beetsplug/mpdstats.py +++ b/libs/common/beetsplug/mpdstats.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Peter Schnebel and Johann Klähn. # @@ -13,11 +12,8 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function import mpd -import socket -import select import time import os @@ -45,14 +41,21 @@ def is_url(path): return path.split('://', 1)[0] in ['http', 'https'] -class MPDClientWrapper(object): +class MPDClientWrapper: def __init__(self, log): self._log = log - self.music_directory = ( - mpd_config['music_directory'].as_str()) + self.music_directory = mpd_config['music_directory'].as_str() + self.strip_path = mpd_config['strip_path'].as_str() - self.client = mpd.MPDClient(use_unicode=True) + # Ensure strip_path end with '/' + if not self.strip_path.endswith('/'): + self.strip_path += '/' + + self._log.debug('music_directory: {0}', self.music_directory) + self._log.debug('strip_path: {0}', self.strip_path) + + self.client = mpd.MPDClient() def connect(self): """Connect to the MPD. @@ -63,11 +66,11 @@ class MPDClientWrapper(object): if host[0] in ['/', '~']: host = os.path.expanduser(host) - self._log.info(u'connecting to {0}:{1}', host, port) + self._log.info('connecting to {0}:{1}', host, port) try: self.client.connect(host, port) - except socket.error as e: - raise ui.UserError(u'could not connect to MPD: {0}'.format(e)) + except OSError as e: + raise ui.UserError(f'could not connect to MPD: {e}') password = mpd_config['password'].as_str() if password: @@ -75,7 +78,7 @@ class MPDClientWrapper(object): self.client.password(password) except mpd.CommandError as e: raise ui.UserError( - u'could not authenticate to MPD: {0}'.format(e) + f'could not authenticate to MPD: {e}' ) def disconnect(self): @@ -90,12 +93,12 @@ class MPDClientWrapper(object): """ try: return getattr(self.client, command)() - except (select.error, mpd.ConnectionError) as err: - self._log.error(u'{0}', err) + except (OSError, mpd.ConnectionError) as err: + self._log.error('{0}', err) if retries <= 0: # if we exited without breaking, we couldn't reconnect in time :( - raise ui.UserError(u'communication with MPD server failed') + raise ui.UserError('communication with MPD server failed') time.sleep(RETRY_INTERVAL) @@ -107,18 +110,26 @@ class MPDClientWrapper(object): self.connect() return self.get(command, retries=retries - 1) - def playlist(self): - """Return the currently active playlist. Prefixes paths with the - music_directory, to get the absolute path. + def currentsong(self): + """Return the path to the currently playing song, along with its + songid. Prefixes paths with the music_directory, to get the absolute + path. + In some cases, we need to remove the local path from MPD server, + we replace 'strip_path' with ''. + `strip_path` defaults to ''. """ - result = {} - for entry in self.get('playlistinfo'): + result = None + entry = self.get('currentsong') + if 'file' in entry: if not is_url(entry['file']): - result[entry['id']] = os.path.join( - self.music_directory, entry['file']) + file = entry['file'] + if file.startswith(self.strip_path): + file = file[len(self.strip_path):] + result = os.path.join(self.music_directory, file) else: - result[entry['id']] = entry['file'] - return result + result = entry['file'] + self._log.debug('returning: {0}', result) + return result, entry.get('id') def status(self): """Return the current status of the MPD. @@ -132,7 +143,7 @@ class MPDClientWrapper(object): return self.get('idle') -class MPDStats(object): +class MPDStats: def __init__(self, lib, log): self.lib = lib self._log = log @@ -164,7 +175,7 @@ class MPDStats(object): if item: return item else: - self._log.info(u'item not found: {0}', displayable_path(path)) + self._log.info('item not found: {0}', displayable_path(path)) def update_item(self, item, attribute, value=None, increment=None): """Update the beets item. Set attribute to value or increment the value @@ -182,7 +193,7 @@ class MPDStats(object): item[attribute] = value item.store() - self._log.debug(u'updated: {0} = {1} [{2}]', + self._log.debug('updated: {0} = {1} [{2}]', attribute, item[attribute], displayable_path(item.path)) @@ -229,29 +240,31 @@ class MPDStats(object): """Updates the play count of a song. """ self.update_item(song['beets_item'], 'play_count', increment=1) - self._log.info(u'played {0}', displayable_path(song['path'])) + self._log.info('played {0}', displayable_path(song['path'])) def handle_skipped(self, song): """Updates the skip count of a song. """ self.update_item(song['beets_item'], 'skip_count', increment=1) - self._log.info(u'skipped {0}', displayable_path(song['path'])) + self._log.info('skipped {0}', displayable_path(song['path'])) def on_stop(self, status): - self._log.info(u'stop') + self._log.info('stop') - if self.now_playing: + # if the current song stays the same it means that we stopped on the + # current track and should not record a skip. + if self.now_playing and self.now_playing['id'] != status.get('songid'): self.handle_song_change(self.now_playing) self.now_playing = None def on_pause(self, status): - self._log.info(u'pause') + self._log.info('pause') self.now_playing = None def on_play(self, status): - playlist = self.mpd.playlist() - path = playlist.get(status['songid']) + + path, songid = self.mpd.currentsong() if not path: return @@ -276,16 +289,17 @@ class MPDStats(object): self.handle_song_change(self.now_playing) if is_url(path): - self._log.info(u'playing stream {0}', displayable_path(path)) + self._log.info('playing stream {0}', displayable_path(path)) self.now_playing = None return - self._log.info(u'playing {0}', displayable_path(path)) + self._log.info('playing {0}', displayable_path(path)) self.now_playing = { - 'started': time.time(), - 'remaining': remaining, - 'path': path, + 'started': time.time(), + 'remaining': remaining, + 'path': path, + 'id': songid, 'beets_item': self.get_item(path), } @@ -305,7 +319,7 @@ class MPDStats(object): if handler: handler(status) else: - self._log.debug(u'unhandled status "{0}"', status) + self._log.debug('unhandled status "{0}"', status) events = self.mpd.events() @@ -313,37 +327,38 @@ class MPDStats(object): class MPDStatsPlugin(plugins.BeetsPlugin): item_types = { - 'play_count': types.INTEGER, - 'skip_count': types.INTEGER, + 'play_count': types.INTEGER, + 'skip_count': types.INTEGER, 'last_played': library.DateType(), - 'rating': types.FLOAT, + 'rating': types.FLOAT, } def __init__(self): - super(MPDStatsPlugin, self).__init__() + super().__init__() mpd_config.add({ 'music_directory': config['directory'].as_filename(), - 'rating': True, - 'rating_mix': 0.75, - 'host': os.environ.get('MPD_HOST', u'localhost'), - 'port': 6600, - 'password': u'', + 'strip_path': '', + 'rating': True, + 'rating_mix': 0.75, + 'host': os.environ.get('MPD_HOST', 'localhost'), + 'port': int(os.environ.get('MPD_PORT', 6600)), + 'password': '', }) mpd_config['password'].redact = True def commands(self): cmd = ui.Subcommand( 'mpdstats', - help=u'run a MPD client to gather play statistics') + help='run a MPD client to gather play statistics') cmd.parser.add_option( - u'--host', dest='host', type='string', - help=u'set the hostname of the server to connect to') + '--host', dest='host', type='string', + help='set the hostname of the server to connect to') cmd.parser.add_option( - u'--port', dest='port', type='int', - help=u'set the port of the MPD server to connect to') + '--port', dest='port', type='int', + help='set the port of the MPD server to connect to') cmd.parser.add_option( - u'--password', dest='password', type='string', - help=u'set the password of the MPD server to connect to') + '--password', dest='password', type='string', + help='set the password of the MPD server to connect to') def func(lib, opts, args): mpd_config.set_args(opts) diff --git a/libs/common/beetsplug/mpdupdate.py b/libs/common/beetsplug/mpdupdate.py index 6ecc9213..e5264e18 100644 --- a/libs/common/beetsplug/mpdupdate.py +++ b/libs/common/beetsplug/mpdupdate.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -21,19 +20,17 @@ Put something like the following in your config.yaml to configure: port: 6600 password: seekrit """ -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin import os import socket from beets import config -import six # No need to introduce a dependency on an MPD library for such a # simple use case. Here's a simple socket abstraction to make things # easier. -class BufferedSocket(object): +class BufferedSocket: """Socket abstraction that allows reading by line.""" def __init__(self, host, port, sep=b'\n'): if host[0] in ['/', '~']: @@ -66,11 +63,11 @@ class BufferedSocket(object): class MPDUpdatePlugin(BeetsPlugin): def __init__(self): - super(MPDUpdatePlugin, self).__init__() + super().__init__() config['mpd'].add({ - 'host': os.environ.get('MPD_HOST', u'localhost'), - 'port': 6600, - 'password': u'', + 'host': os.environ.get('MPD_HOST', 'localhost'), + 'port': int(os.environ.get('MPD_PORT', 6600)), + 'password': '', }) config['mpd']['password'].redact = True @@ -100,21 +97,21 @@ class MPDUpdatePlugin(BeetsPlugin): try: s = BufferedSocket(host, port) - except socket.error as e: - self._log.warning(u'MPD connection failed: {0}', - six.text_type(e.strerror)) + except OSError as e: + self._log.warning('MPD connection failed: {0}', + str(e.strerror)) return resp = s.readline() if b'OK MPD' not in resp: - self._log.warning(u'MPD connection failed: {0!r}', resp) + self._log.warning('MPD connection failed: {0!r}', resp) return if password: s.send(b'password "%s"\n' % password.encode('utf8')) resp = s.readline() if b'OK' not in resp: - self._log.warning(u'Authentication failed: {0!r}', resp) + self._log.warning('Authentication failed: {0!r}', resp) s.send(b'close\n') s.close() return @@ -122,8 +119,8 @@ class MPDUpdatePlugin(BeetsPlugin): s.send(b'update\n') resp = s.readline() if b'updating_db' not in resp: - self._log.warning(u'Update failed: {0!r}', resp) + self._log.warning('Update failed: {0!r}', resp) s.send(b'close\n') s.close() - self._log.info(u'Database updated.') + self._log.info('Database updated.') diff --git a/libs/common/beetsplug/parentwork.py b/libs/common/beetsplug/parentwork.py new file mode 100644 index 00000000..75307b8f --- /dev/null +++ b/libs/common/beetsplug/parentwork.py @@ -0,0 +1,211 @@ +# This file is part of beets. +# Copyright 2017, Dorian Soergel. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Gets parent work, its disambiguation and id, composer, composer sort name +and work composition date +""" + + +from beets import ui +from beets.plugins import BeetsPlugin + +import musicbrainzngs + + +def direct_parent_id(mb_workid, work_date=None): + """Given a Musicbrainz work id, find the id one of the works the work is + part of and the first composition date it encounters. + """ + work_info = musicbrainzngs.get_work_by_id(mb_workid, + includes=["work-rels", + "artist-rels"]) + if 'artist-relation-list' in work_info['work'] and work_date is None: + for artist in work_info['work']['artist-relation-list']: + if artist['type'] == 'composer': + if 'end' in artist.keys(): + work_date = artist['end'] + + if 'work-relation-list' in work_info['work']: + for direct_parent in work_info['work']['work-relation-list']: + if direct_parent['type'] == 'parts' \ + and direct_parent.get('direction') == 'backward': + direct_id = direct_parent['work']['id'] + return direct_id, work_date + return None, work_date + + +def work_parent_id(mb_workid): + """Find the parent work id and composition date of a work given its id. + """ + work_date = None + while True: + new_mb_workid, work_date = direct_parent_id(mb_workid, work_date) + if not new_mb_workid: + return mb_workid, work_date + mb_workid = new_mb_workid + return mb_workid, work_date + + +def find_parentwork_info(mb_workid): + """Get the MusicBrainz information dict about a parent work, including + the artist relations, and the composition date for a work's parent work. + """ + parent_id, work_date = work_parent_id(mb_workid) + work_info = musicbrainzngs.get_work_by_id(parent_id, + includes=["artist-rels"]) + return work_info, work_date + + +class ParentWorkPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + self.config.add({ + 'auto': False, + 'force': False, + }) + + if self.config['auto']: + self.import_stages = [self.imported] + + def commands(self): + + def func(lib, opts, args): + self.config.set_args(opts) + force_parent = self.config['force'].get(bool) + write = ui.should_write() + + for item in lib.items(ui.decargs(args)): + changed = self.find_work(item, force_parent) + if changed: + item.store() + if write: + item.try_write() + command = ui.Subcommand( + 'parentwork', + help='fetch parent works, composers and dates') + + command.parser.add_option( + '-f', '--force', dest='force', + action='store_true', default=None, + help='re-fetch when parent work is already present') + + command.func = func + return [command] + + def imported(self, session, task): + """Import hook for fetching parent works automatically. + """ + force_parent = self.config['force'].get(bool) + + for item in task.imported_items(): + self.find_work(item, force_parent) + item.store() + + def get_info(self, item, work_info): + """Given the parent work info dict, fetch parent_composer, + parent_composer_sort, parentwork, parentwork_disambig, mb_workid and + composer_ids. + """ + + parent_composer = [] + parent_composer_sort = [] + parentwork_info = {} + + composer_exists = False + if 'artist-relation-list' in work_info['work']: + for artist in work_info['work']['artist-relation-list']: + if artist['type'] == 'composer': + composer_exists = True + parent_composer.append(artist['artist']['name']) + parent_composer_sort.append(artist['artist']['sort-name']) + if 'end' in artist.keys(): + parentwork_info["parentwork_date"] = artist['end'] + + parentwork_info['parent_composer'] = ', '.join(parent_composer) + parentwork_info['parent_composer_sort'] = ', '.join( + parent_composer_sort) + + if not composer_exists: + self._log.debug( + 'no composer for {}; add one at ' + 'https://musicbrainz.org/work/{}', + item, work_info['work']['id'], + ) + + parentwork_info['parentwork'] = work_info['work']['title'] + parentwork_info['mb_parentworkid'] = work_info['work']['id'] + + if 'disambiguation' in work_info['work']: + parentwork_info['parentwork_disambig'] = work_info[ + 'work']['disambiguation'] + + else: + parentwork_info['parentwork_disambig'] = None + + return parentwork_info + + def find_work(self, item, force): + """Finds the parent work of a recording and populates the tags + accordingly. + + The parent work is found recursively, by finding the direct parent + repeatedly until there are no more links in the chain. We return the + final, topmost work in the chain. + + Namely, the tags parentwork, parentwork_disambig, mb_parentworkid, + parent_composer, parent_composer_sort and work_date are populated. + """ + + if not item.mb_workid: + self._log.info('No work for {}, \ +add one at https://musicbrainz.org/recording/{}', item, item.mb_trackid) + return + + hasparent = hasattr(item, 'parentwork') + work_changed = True + if hasattr(item, 'parentwork_workid_current'): + work_changed = item.parentwork_workid_current != item.mb_workid + if force or not hasparent or work_changed: + try: + work_info, work_date = find_parentwork_info(item.mb_workid) + except musicbrainzngs.musicbrainz.WebServiceError as e: + self._log.debug("error fetching work: {}", e) + return + parent_info = self.get_info(item, work_info) + parent_info['parentwork_workid_current'] = item.mb_workid + if 'parent_composer' in parent_info: + self._log.debug("Work fetched: {} - {}", + parent_info['parentwork'], + parent_info['parent_composer']) + else: + self._log.debug("Work fetched: {} - no parent composer", + parent_info['parentwork']) + + elif hasparent: + self._log.debug("{}: Work present, skipping", item) + return + + # apply all non-null values to the item + for key, value in parent_info.items(): + if value: + item[key] = value + + if work_date: + item['work_date'] = work_date + return ui.show_model_changes( + item, fields=['parentwork', 'parentwork_disambig', + 'mb_parentworkid', 'parent_composer', + 'parent_composer_sort', 'work_date', + 'parentwork_workid_current', 'parentwork_date']) diff --git a/libs/common/beetsplug/permissions.py b/libs/common/beetsplug/permissions.py index dd9e0984..f5aab056 100644 --- a/libs/common/beetsplug/permissions.py +++ b/libs/common/beetsplug/permissions.py @@ -1,7 +1,3 @@ -# -*- coding: utf-8 -*- - -from __future__ import division, absolute_import, print_function - """Fixes file permissions after the file gets written on import. Put something like the following in your config.yaml to configure: @@ -13,7 +9,6 @@ import os from beets import config, util from beets.plugins import BeetsPlugin from beets.util import ancestry -import six def convert_perm(perm): @@ -21,8 +16,8 @@ def convert_perm(perm): Or, if `perm` is an integer, reinterpret it as an octal number that has been "misinterpreted" as decimal. """ - if isinstance(perm, six.integer_types): - perm = six.text_type(perm) + if isinstance(perm, int): + perm = str(perm) return int(perm, 8) @@ -40,11 +35,11 @@ def assert_permissions(path, permission, log): """ if not check_permissions(util.syspath(path), permission): log.warning( - u'could not set permissions on {}', + 'could not set permissions on {}', util.displayable_path(path), ) log.debug( - u'set permissions to {}, but permissions are now {}', + 'set permissions to {}, but permissions are now {}', permission, os.stat(util.syspath(path)).st_mode & 0o777, ) @@ -60,20 +55,39 @@ def dirs_in_library(library, item): class Permissions(BeetsPlugin): def __init__(self): - super(Permissions, self).__init__() + super().__init__() # Adding defaults. self.config.add({ - u'file': '644', - u'dir': '755', + 'file': '644', + 'dir': '755', }) self.register_listener('item_imported', self.fix) self.register_listener('album_imported', self.fix) + self.register_listener('art_set', self.fix_art) def fix(self, lib, item=None, album=None): """Fix the permissions for an imported Item or Album. """ + files = [] + dirs = set() + if item: + files.append(item.path) + dirs.update(dirs_in_library(lib.directory, item.path)) + elif album: + for album_item in album.items(): + files.append(album_item.path) + dirs.update(dirs_in_library(lib.directory, album_item.path)) + self.set_permissions(files=files, dirs=dirs) + + def fix_art(self, album): + """Fix the permission for Album art file. + """ + if album.artpath: + self.set_permissions(files=[album.artpath]) + + def set_permissions(self, files=[], dirs=[]): # Get the configured permissions. The user can specify this either a # string (in YAML quotes) or, for convenience, as an integer so the # quotes can be omitted. In the latter case, we need to reinterpret the @@ -83,21 +97,10 @@ class Permissions(BeetsPlugin): file_perm = convert_perm(file_perm) dir_perm = convert_perm(dir_perm) - # Create chmod_queue. - file_chmod_queue = [] - if item: - file_chmod_queue.append(item.path) - elif album: - for album_item in album.items(): - file_chmod_queue.append(album_item.path) - - # A set of directories to change permissions for. - dir_chmod_queue = set() - - for path in file_chmod_queue: + for path in files: # Changing permissions on the destination file. self._log.debug( - u'setting file permissions on {}', + 'setting file permissions on {}', util.displayable_path(path), ) os.chmod(util.syspath(path), file_perm) @@ -105,16 +108,11 @@ class Permissions(BeetsPlugin): # Checks if the destination path has the permissions configured. assert_permissions(path, file_perm, self._log) - # Adding directories to the directory chmod queue. - dir_chmod_queue.update( - dirs_in_library(lib.directory, - path)) - # Change permissions for the directories. - for path in dir_chmod_queue: - # Chaning permissions on the destination directory. + for path in dirs: + # Changing permissions on the destination directory. self._log.debug( - u'setting directory permissions on {}', + 'setting directory permissions on {}', util.displayable_path(path), ) os.chmod(util.syspath(path), dir_perm) diff --git a/libs/common/beetsplug/play.py b/libs/common/beetsplug/play.py index 4d32a357..f4233490 100644 --- a/libs/common/beetsplug/play.py +++ b/libs/common/beetsplug/play.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, David Hamp-Gonsalves # @@ -15,7 +14,6 @@ """Send the results of a query to the configured music player as a playlist. """ -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets.ui import Subcommand @@ -26,6 +24,7 @@ from beets import util from os.path import relpath from tempfile import NamedTemporaryFile import subprocess +import shlex # Indicate where arguments should be inserted into the command string. # If this is missing, they're placed at the end. @@ -39,25 +38,25 @@ def play(command_str, selection, paths, open_args, log, item_type='track', """ # Print number of tracks or albums to be played, log command to be run. item_type += 's' if len(selection) > 1 else '' - ui.print_(u'Playing {0} {1}.'.format(len(selection), item_type)) - log.debug(u'executing command: {} {!r}', command_str, open_args) + ui.print_('Playing {} {}.'.format(len(selection), item_type)) + log.debug('executing command: {} {!r}', command_str, open_args) try: if keep_open: - command = util.shlex_split(command_str) + command = shlex.split(command_str) command = command + open_args subprocess.call(command) else: util.interactive_open(open_args, command_str) except OSError as exc: raise ui.UserError( - "Could not play the query: {0}".format(exc)) + f"Could not play the query: {exc}") class PlayPlugin(BeetsPlugin): def __init__(self): - super(PlayPlugin, self).__init__() + super().__init__() config['play'].add({ 'command': None, @@ -65,6 +64,7 @@ class PlayPlugin(BeetsPlugin): 'relative_to': None, 'raw': False, 'warning_threshold': 100, + 'bom': False, }) self.register_listener('before_choose_candidate', @@ -73,18 +73,18 @@ class PlayPlugin(BeetsPlugin): def commands(self): play_command = Subcommand( 'play', - help=u'send music to a player as a playlist' + help='send music to a player as a playlist' ) play_command.parser.add_album_option() play_command.parser.add_option( - u'-A', u'--args', + '-A', '--args', action='store', - help=u'add additional arguments to the command', + help='add additional arguments to the command', ) play_command.parser.add_option( - u'-y', u'--yes', + '-y', '--yes', action="store_true", - help=u'skip the warning threshold', + help='skip the warning threshold', ) play_command.func = self._play_command return [play_command] @@ -123,7 +123,7 @@ class PlayPlugin(BeetsPlugin): if not selection: ui.print_(ui.colorize('text_warning', - u'No {0} to play.'.format(item_type))) + f'No {item_type} to play.')) return open_args = self._playlist_or_paths(paths) @@ -147,7 +147,7 @@ class PlayPlugin(BeetsPlugin): if ARGS_MARKER in command_str: return command_str.replace(ARGS_MARKER, args) else: - return u"{} {}".format(command_str, args) + return f"{command_str} {args}" else: # Don't include the marker in the command. return command_str.replace(" " + ARGS_MARKER, "") @@ -174,10 +174,10 @@ class PlayPlugin(BeetsPlugin): ui.print_(ui.colorize( 'text_warning', - u'You are about to queue {0} {1}.'.format( + 'You are about to queue {} {}.'.format( len(selection), item_type))) - if ui.input_options((u'Continue', u'Abort')) == 'a': + if ui.input_options(('Continue', 'Abort')) == 'a': return True return False @@ -185,7 +185,12 @@ class PlayPlugin(BeetsPlugin): def _create_tmp_playlist(self, paths_list): """Create a temporary .m3u file. Return the filename. """ + utf8_bom = config['play']['bom'].get(bool) m3u = NamedTemporaryFile('wb', suffix='.m3u', delete=False) + + if utf8_bom: + m3u.write(b'\xEF\xBB\xBF') + for item in paths_list: m3u.write(item + b'\n') m3u.close() diff --git a/libs/common/beetsplug/playlist.py b/libs/common/beetsplug/playlist.py new file mode 100644 index 00000000..265b8bad --- /dev/null +++ b/libs/common/beetsplug/playlist.py @@ -0,0 +1,185 @@ +# This file is part of beets. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +import os +import fnmatch +import tempfile +import beets +from beets.util import path_as_posix + + +class PlaylistQuery(beets.dbcore.Query): + """Matches files listed by a playlist file. + """ + def __init__(self, pattern): + self.pattern = pattern + config = beets.config['playlist'] + + # Get the full path to the playlist + playlist_paths = ( + pattern, + os.path.abspath(os.path.join( + config['playlist_dir'].as_filename(), + f'{pattern}.m3u', + )), + ) + + self.paths = [] + for playlist_path in playlist_paths: + if not fnmatch.fnmatch(playlist_path, '*.[mM]3[uU]'): + # This is not am M3U playlist, skip this candidate + continue + + try: + f = open(beets.util.syspath(playlist_path), mode='rb') + except OSError: + continue + + if config['relative_to'].get() == 'library': + relative_to = beets.config['directory'].as_filename() + elif config['relative_to'].get() == 'playlist': + relative_to = os.path.dirname(playlist_path) + else: + relative_to = config['relative_to'].as_filename() + relative_to = beets.util.bytestring_path(relative_to) + + for line in f: + if line[0] == '#': + # ignore comments, and extm3u extension + continue + + self.paths.append(beets.util.normpath( + os.path.join(relative_to, line.rstrip()) + )) + f.close() + break + + def col_clause(self): + if not self.paths: + # Playlist is empty + return '0', () + clause = 'path IN ({})'.format(', '.join('?' for path in self.paths)) + return clause, (beets.library.BLOB_TYPE(p) for p in self.paths) + + def match(self, item): + return item.path in self.paths + + +class PlaylistPlugin(beets.plugins.BeetsPlugin): + item_queries = {'playlist': PlaylistQuery} + + def __init__(self): + super().__init__() + self.config.add({ + 'auto': False, + 'playlist_dir': '.', + 'relative_to': 'library', + 'forward_slash': False, + }) + + self.playlist_dir = self.config['playlist_dir'].as_filename() + self.changes = {} + + if self.config['relative_to'].get() == 'library': + self.relative_to = beets.util.bytestring_path( + beets.config['directory'].as_filename()) + elif self.config['relative_to'].get() != 'playlist': + self.relative_to = beets.util.bytestring_path( + self.config['relative_to'].as_filename()) + else: + self.relative_to = None + + if self.config['auto']: + self.register_listener('item_moved', self.item_moved) + self.register_listener('item_removed', self.item_removed) + self.register_listener('cli_exit', self.cli_exit) + + def item_moved(self, item, source, destination): + self.changes[source] = destination + + def item_removed(self, item): + if not os.path.exists(beets.util.syspath(item.path)): + self.changes[item.path] = None + + def cli_exit(self, lib): + for playlist in self.find_playlists(): + self._log.info(f'Updating playlist: {playlist}') + base_dir = beets.util.bytestring_path( + self.relative_to if self.relative_to + else os.path.dirname(playlist) + ) + + try: + self.update_playlist(playlist, base_dir) + except beets.util.FilesystemError: + self._log.error('Failed to update playlist: {}'.format( + beets.util.displayable_path(playlist))) + + def find_playlists(self): + """Find M3U playlists in the playlist directory.""" + try: + dir_contents = os.listdir(beets.util.syspath(self.playlist_dir)) + except OSError: + self._log.warning('Unable to open playlist directory {}'.format( + beets.util.displayable_path(self.playlist_dir))) + return + + for filename in dir_contents: + if fnmatch.fnmatch(filename, '*.[mM]3[uU]'): + yield os.path.join(self.playlist_dir, filename) + + def update_playlist(self, filename, base_dir): + """Find M3U playlists in the specified directory.""" + changes = 0 + deletions = 0 + + with tempfile.NamedTemporaryFile(mode='w+b', delete=False) as tempfp: + new_playlist = tempfp.name + with open(filename, mode='rb') as fp: + for line in fp: + original_path = line.rstrip(b'\r\n') + + # Ensure that path from playlist is absolute + is_relative = not os.path.isabs(line) + if is_relative: + lookup = os.path.join(base_dir, original_path) + else: + lookup = original_path + + try: + new_path = self.changes[beets.util.normpath(lookup)] + except KeyError: + if self.config['forward_slash']: + line = path_as_posix(line) + tempfp.write(line) + else: + if new_path is None: + # Item has been deleted + deletions += 1 + continue + + changes += 1 + if is_relative: + new_path = os.path.relpath(new_path, base_dir) + line = line.replace(original_path, new_path) + if self.config['forward_slash']: + line = path_as_posix(line) + tempfp.write(line) + + if changes or deletions: + self._log.info( + 'Updated playlist {} ({} changes, {} deletions)'.format( + filename, changes, deletions)) + beets.util.copy(new_playlist, filename, replace=True) + beets.util.remove(new_playlist) diff --git a/libs/common/beetsplug/plexupdate.py b/libs/common/beetsplug/plexupdate.py index 17fd8208..2261a55f 100644 --- a/libs/common/beetsplug/plexupdate.py +++ b/libs/common/beetsplug/plexupdate.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Updates an Plex library whenever the beets library is changed. Plex Home users enter the Plex Token to enable updating. @@ -9,42 +7,51 @@ Put something like the following in your config.yaml to configure: port: 32400 token: token """ -from __future__ import division, absolute_import, print_function import requests -import xml.etree.ElementTree as ET -from six.moves.urllib.parse import urljoin, urlencode +from xml.etree import ElementTree +from urllib.parse import urljoin, urlencode from beets import config from beets.plugins import BeetsPlugin -def get_music_section(host, port, token, library_name): +def get_music_section(host, port, token, library_name, secure, + ignore_cert_errors): """Getting the section key for the music library in Plex. """ api_endpoint = append_token('library/sections', token) - url = urljoin('http://{0}:{1}'.format(host, port), api_endpoint) + url = urljoin('{}://{}:{}'.format(get_protocol(secure), host, + port), api_endpoint) # Sends request. - r = requests.get(url) + r = requests.get(url, verify=not ignore_cert_errors) # Parse xml tree and extract music section key. - tree = ET.fromstring(r.content) + tree = ElementTree.fromstring(r.content) for child in tree.findall('Directory'): if child.get('title') == library_name: return child.get('key') -def update_plex(host, port, token, library_name): +def update_plex(host, port, token, library_name, secure, + ignore_cert_errors): + """Ignore certificate errors if configured to. + """ + if ignore_cert_errors: + import urllib3 + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) """Sends request to the Plex api to start a library refresh. """ # Getting section key and build url. - section_key = get_music_section(host, port, token, library_name) - api_endpoint = 'library/sections/{0}/refresh'.format(section_key) + section_key = get_music_section(host, port, token, library_name, + secure, ignore_cert_errors) + api_endpoint = f'library/sections/{section_key}/refresh' api_endpoint = append_token(api_endpoint, token) - url = urljoin('http://{0}:{1}'.format(host, port), api_endpoint) + url = urljoin('{}://{}:{}'.format(get_protocol(secure), host, + port), api_endpoint) # Sends request and returns requests object. - r = requests.get(url) + r = requests.get(url, verify=not ignore_cert_errors) return r @@ -56,16 +63,25 @@ def append_token(url, token): return url +def get_protocol(secure): + if secure: + return 'https' + else: + return 'http' + + class PlexUpdate(BeetsPlugin): def __init__(self): - super(PlexUpdate, self).__init__() + super().__init__() # Adding defaults. config['plex'].add({ - u'host': u'localhost', - u'port': 32400, - u'token': u'', - u'library_name': u'Music'}) + 'host': 'localhost', + 'port': 32400, + 'token': '', + 'library_name': 'Music', + 'secure': False, + 'ignore_cert_errors': False}) config['plex']['token'].redact = True self.register_listener('database_change', self.listen_for_db_change) @@ -77,7 +93,7 @@ class PlexUpdate(BeetsPlugin): def update(self, lib): """When the client exists try to send refresh request to Plex server. """ - self._log.info(u'Updating Plex library...') + self._log.info('Updating Plex library...') # Try to send update request. try: @@ -85,8 +101,10 @@ class PlexUpdate(BeetsPlugin): config['plex']['host'].get(), config['plex']['port'].get(), config['plex']['token'].get(), - config['plex']['library_name'].get()) - self._log.info(u'... started.') + config['plex']['library_name'].get(), + config['plex']['secure'].get(bool), + config['plex']['ignore_cert_errors'].get(bool)) + self._log.info('... started.') except requests.exceptions.RequestException: - self._log.warning(u'Update failed.') + self._log.warning('Update failed.') diff --git a/libs/common/beetsplug/random.py b/libs/common/beetsplug/random.py index 65caaf90..ea9b7b98 100644 --- a/libs/common/beetsplug/random.py +++ b/libs/common/beetsplug/random.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Philippe Mongeau. # @@ -15,101 +14,10 @@ """Get a random song or album from the library. """ -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets.ui import Subcommand, decargs, print_ -import random -from operator import attrgetter -from itertools import groupby - - -def _length(obj, album): - """Get the duration of an item or album. - """ - if album: - return sum(i.length for i in obj.items()) - else: - return obj.length - - -def _equal_chance_permutation(objs, field='albumartist'): - """Generate (lazily) a permutation of the objects where every group - with equal values for `field` have an equal chance of appearing in - any given position. - """ - # Group the objects by artist so we can sample from them. - key = attrgetter(field) - objs.sort(key=key) - objs_by_artists = {} - for artist, v in groupby(objs, key): - objs_by_artists[artist] = list(v) - - # While we still have artists with music to choose from, pick one - # randomly and pick a track from that artist. - while objs_by_artists: - # Choose an artist and an object for that artist, removing - # this choice from the pool. - artist = random.choice(list(objs_by_artists.keys())) - objs_from_artist = objs_by_artists[artist] - i = random.randint(0, len(objs_from_artist) - 1) - yield objs_from_artist.pop(i) - - # Remove the artist if we've used up all of its objects. - if not objs_from_artist: - del objs_by_artists[artist] - - -def _take(iter, num): - """Return a list containing the first `num` values in `iter` (or - fewer, if the iterable ends early). - """ - out = [] - for val in iter: - out.append(val) - num -= 1 - if num <= 0: - break - return out - - -def _take_time(iter, secs, album): - """Return a list containing the first values in `iter`, which should - be Item or Album objects, that add up to the given amount of time in - seconds. - """ - out = [] - total_time = 0.0 - for obj in iter: - length = _length(obj, album) - if total_time + length <= secs: - out.append(obj) - total_time += length - return out - - -def random_objs(objs, album, number=1, time=None, equal_chance=False): - """Get a random subset of the provided `objs`. - - If `number` is provided, produce that many matches. Otherwise, if - `time` is provided, instead select a list whose total time is close - to that number of minutes. If `equal_chance` is true, give each - artist an equal chance of being included so that artists with more - songs are not represented disproportionately. - """ - # Permute the objects either in a straightforward way or an - # artist-balanced way. - if equal_chance: - perm = _equal_chance_permutation(objs) - else: - perm = objs - random.shuffle(perm) # N.B. This shuffles the original list. - - # Select objects by time our count. - if time: - return _take_time(perm, time * 60, album) - else: - return _take(perm, number) +from beets.random import random_objs def random_func(lib, opts, args): @@ -130,16 +38,16 @@ def random_func(lib, opts, args): random_cmd = Subcommand('random', - help=u'choose a random track or album') + help='choose a random track or album') random_cmd.parser.add_option( - u'-n', u'--number', action='store', type="int", - help=u'number of objects to choose', default=1) + '-n', '--number', action='store', type="int", + help='number of objects to choose', default=1) random_cmd.parser.add_option( - u'-e', u'--equal-chance', action='store_true', - help=u'each artist has the same chance') + '-e', '--equal-chance', action='store_true', + help='each artist has the same chance') random_cmd.parser.add_option( - u'-t', u'--time', action='store', type="float", - help=u'total length in minutes of objects to choose') + '-t', '--time', action='store', type="float", + help='total length in minutes of objects to choose') random_cmd.parser.add_all_common_options() random_cmd.func = random_func diff --git a/libs/common/beetsplug/replaygain.py b/libs/common/beetsplug/replaygain.py index a7eb81b5..b6297d93 100644 --- a/libs/common/beetsplug/replaygain.py +++ b/libs/common/beetsplug/replaygain.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Fabrice Laporte, Yevgeny Bezman, and Adrian Sampson. # @@ -13,20 +12,23 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function -import subprocess -import os import collections +import enum +import math +import os +import signal +import subprocess import sys import warnings -import xml.parsers.expat -from six.moves import zip +from multiprocessing.pool import ThreadPool, RUN +from six.moves import queue +from threading import Thread, Event from beets import ui from beets.plugins import BeetsPlugin -from beets.util import (syspath, command_output, bytestring_path, - displayable_path, py3_path) +from beets.util import (syspath, command_output, displayable_path, + py3_path, cpu_count) # Utilities. @@ -47,238 +49,342 @@ class FatalGstreamerPluginReplayGainError(FatalReplayGainError): loading the required plugins.""" -def call(args): +def call(args, **kwargs): """Execute the command and return its output or raise a ReplayGainError on failure. """ try: - return command_output(args) + return command_output(args, **kwargs) except subprocess.CalledProcessError as e: raise ReplayGainError( - u"{0} exited with status {1}".format(args[0], e.returncode) + "{} exited with status {}".format(args[0], e.returncode) ) except UnicodeEncodeError: # Due to a bug in Python 2's subprocess on Windows, Unicode # filenames can fail to encode on that platform. See: # https://github.com/google-code-export/beets/issues/499 - raise ReplayGainError(u"argument encoding failed") + raise ReplayGainError("argument encoding failed") + + +def after_version(version_a, version_b): + return tuple(int(s) for s in version_a.split('.')) \ + >= tuple(int(s) for s in version_b.split('.')) + + +def db_to_lufs(db): + """Convert db to LUFS. + + According to https://wiki.hydrogenaud.io/index.php?title= + ReplayGain_2.0_specification#Reference_level + """ + return db - 107 + + +def lufs_to_db(db): + """Convert LUFS to db. + + According to https://wiki.hydrogenaud.io/index.php?title= + ReplayGain_2.0_specification#Reference_level + """ + return db + 107 # Backend base and plumbing classes. +# gain: in LU to reference level +# peak: part of full scale (FS is 1.0) Gain = collections.namedtuple("Gain", "gain peak") +# album_gain: Gain object +# track_gains: list of Gain objects AlbumGain = collections.namedtuple("AlbumGain", "album_gain track_gains") -class Backend(object): +class Peak(enum.Enum): + none = 0 + true = 1 + sample = 2 + + +class Backend: """An abstract class representing engine for calculating RG values. """ + do_parallel = False + def __init__(self, config, log): """Initialize the backend with the configuration view for the plugin. """ self._log = log - def compute_track_gain(self, items): - raise NotImplementedError() - - def compute_album_gain(self, album): - # TODO: implement album gain in terms of track gain of the - # individual tracks which can be used for any backend. - raise NotImplementedError() - - -# bsg1770gain backend -class Bs1770gainBackend(Backend): - """bs1770gain is a loudness scanner compliant with ITU-R BS.1770 and - its flavors EBU R128, ATSC A/85 and Replaygain 2.0. - """ - - def __init__(self, config, log): - super(Bs1770gainBackend, self).__init__(config, log) - config.add({ - 'chunk_at': 5000, - 'method': 'replaygain', - }) - self.chunk_at = config['chunk_at'].as_number() - self.method = '--' + config['method'].as_str() - - cmd = 'bs1770gain' - try: - call([cmd, self.method]) - self.command = cmd - except OSError: - raise FatalReplayGainError( - u'Is bs1770gain installed? Is your method in config correct?' - ) - if not self.command: - raise FatalReplayGainError( - u'no replaygain command found: install bs1770gain' - ) - - def compute_track_gain(self, items): + def compute_track_gain(self, items, target_level, peak): """Computes the track gain of the given tracks, returns a list - of TrackGain objects. + of Gain objects. """ + raise NotImplementedError() - output = self.compute_gain(items, False) - return output - - def compute_album_gain(self, album): + def compute_album_gain(self, items, target_level, peak): """Computes the album gain of the given album, returns an AlbumGain object. """ - # TODO: What should be done when not all tracks in the album are - # supported? + raise NotImplementedError() - supported_items = album.items() - output = self.compute_gain(supported_items, True) - if not output: - raise ReplayGainError(u'no output from bs1770gain') - return AlbumGain(output[-1], output[:-1]) +# ffmpeg backend +class FfmpegBackend(Backend): + """A replaygain backend using ffmpeg's ebur128 filter. + """ - def isplitter(self, items, chunk_at): - """Break an iterable into chunks of at most size `chunk_at`, - generating lists for each chunk. - """ - iterable = iter(items) - while True: - result = [] - for i in range(chunk_at): - try: - a = next(iterable) - except StopIteration: - break - else: - result.append(a) - if result: - yield result - else: - break + do_parallel = True - def compute_gain(self, items, is_album): - """Computes the track or album gain of a list of items, returns - a list of TrackGain objects. - When computing album gain, the last TrackGain object returned is - the album gain - """ + def __init__(self, config, log): + super().__init__(config, log) + self._ffmpeg_path = "ffmpeg" - if len(items) == 0: - return [] - - albumgaintot = 0.0 - albumpeaktot = 0.0 - returnchunks = [] - - # In the case of very large sets of music, we break the tracks - # into smaller chunks and process them one at a time. This - # avoids running out of memory. - if len(items) > self.chunk_at: - i = 0 - for chunk in self.isplitter(items, self.chunk_at): - i += 1 - returnchunk = self.compute_chunk_gain(chunk, is_album) - albumgaintot += returnchunk[-1].gain - albumpeaktot += returnchunk[-1].peak - returnchunks = returnchunks + returnchunk[0:-1] - returnchunks.append(Gain(albumgaintot / i, albumpeaktot / i)) - return returnchunks - else: - return self.compute_chunk_gain(items, is_album) - - def compute_chunk_gain(self, items, is_album): - """Compute ReplayGain values and return a list of results - dictionaries as given by `parse_tool_output`. - """ - # Construct shell command. - cmd = [self.command] - cmd += [self.method] - cmd += ['--xml', '-p'] - - # Workaround for Windows: the underlying tool fails on paths - # with the \\?\ prefix, so we don't use it here. This - # prevents the backend from working with long paths. - args = cmd + [syspath(i.path, prefix=False) for i in items] - path_list = [i.path for i in items] - - # Invoke the command. - self._log.debug( - u'executing {0}', u' '.join(map(displayable_path, args)) - ) - output = call(args) - - self._log.debug(u'analysis finished: {0}', output) - results = self.parse_tool_output(output, path_list, is_album) - self._log.debug(u'{0} items, {1} results', len(items), len(results)) - return results - - def parse_tool_output(self, text, path_list, is_album): - """Given the output from bs1770gain, parse the text and - return a list of dictionaries - containing information about each analyzed file. - """ - per_file_gain = {} - album_gain = {} # mutable variable so it can be set from handlers - parser = xml.parsers.expat.ParserCreate(encoding='utf-8') - state = {'file': None, 'gain': None, 'peak': None} - - def start_element_handler(name, attrs): - if name == u'track': - state['file'] = bytestring_path(attrs[u'file']) - if state['file'] in per_file_gain: - raise ReplayGainError( - u'duplicate filename in bs1770gain output') - elif name == u'integrated': - state['gain'] = float(attrs[u'lu']) - elif name == u'sample-peak': - state['peak'] = float(attrs[u'factor']) - - def end_element_handler(name): - if name == u'track': - if state['gain'] is None or state['peak'] is None: - raise ReplayGainError(u'could not parse gain or peak from ' - 'the output of bs1770gain') - per_file_gain[state['file']] = Gain(state['gain'], - state['peak']) - state['gain'] = state['peak'] = None - elif name == u'summary': - if state['gain'] is None or state['peak'] is None: - raise ReplayGainError(u'could not parse gain or peak from ' - 'the output of bs1770gain') - album_gain["album"] = Gain(state['gain'], state['peak']) - state['gain'] = state['peak'] = None - parser.StartElementHandler = start_element_handler - parser.EndElementHandler = end_element_handler - parser.Parse(text, True) - - if len(per_file_gain) != len(path_list): - raise ReplayGainError( - u'the number of results returned by bs1770gain does not match ' - 'the number of files passed to it') - - # bs1770gain does not return the analysis results in the order that - # files are passed on the command line, because it is sorting the files - # internally. We must recover the order from the filenames themselves. + # check that ffmpeg is installed try: - out = [per_file_gain[os.path.basename(p)] for p in path_list] - except KeyError: + ffmpeg_version_out = call([self._ffmpeg_path, "-version"]) + except OSError: + raise FatalReplayGainError( + f"could not find ffmpeg at {self._ffmpeg_path}" + ) + incompatible_ffmpeg = True + for line in ffmpeg_version_out.stdout.splitlines(): + if line.startswith(b"configuration:"): + if b"--enable-libebur128" in line: + incompatible_ffmpeg = False + if line.startswith(b"libavfilter"): + version = line.split(b" ", 1)[1].split(b"/", 1)[0].split(b".") + version = tuple(map(int, version)) + if version >= (6, 67, 100): + incompatible_ffmpeg = False + if incompatible_ffmpeg: + raise FatalReplayGainError( + "Installed FFmpeg version does not support ReplayGain." + "calculation. Either libavfilter version 6.67.100 or above or" + "the --enable-libebur128 configuration option is required." + ) + + def compute_track_gain(self, items, target_level, peak): + """Computes the track gain of the given tracks, returns a list + of Gain objects (the track gains). + """ + gains = [] + for item in items: + gains.append( + self._analyse_item( + item, + target_level, + peak, + count_blocks=False, + )[0] # take only the gain, discarding number of gating blocks + ) + return gains + + def compute_album_gain(self, items, target_level, peak): + """Computes the album gain of the given album, returns an + AlbumGain object. + """ + target_level_lufs = db_to_lufs(target_level) + + # analyse tracks + # list of track Gain objects + track_gains = [] + # maximum peak + album_peak = 0 + # sum of BS.1770 gating block powers + sum_powers = 0 + # total number of BS.1770 gating blocks + n_blocks = 0 + + for item in items: + track_gain, track_n_blocks = self._analyse_item( + item, target_level, peak + ) + track_gains.append(track_gain) + + # album peak is maximum track peak + album_peak = max(album_peak, track_gain.peak) + + # prepare album_gain calculation + # total number of blocks is sum of track blocks + n_blocks += track_n_blocks + + # convert `LU to target_level` -> LUFS + track_loudness = target_level_lufs - track_gain.gain + # This reverses ITU-R BS.1770-4 p. 6 equation (5) to convert + # from loudness to power. The result is the average gating + # block power. + track_power = 10**((track_loudness + 0.691) / 10) + + # Weight that average power by the number of gating blocks to + # get the sum of all their powers. Add that to the sum of all + # block powers in this album. + sum_powers += track_power * track_n_blocks + + # calculate album gain + if n_blocks > 0: + # compare ITU-R BS.1770-4 p. 6 equation (5) + # Album gain is the replaygain of the concatenation of all tracks. + album_gain = -0.691 + 10 * math.log10(sum_powers / n_blocks) + else: + album_gain = -70 + # convert LUFS -> `LU to target_level` + album_gain = target_level_lufs - album_gain + + self._log.debug( + "{}: gain {} LU, peak {}" + .format(items, album_gain, album_peak) + ) + + return AlbumGain(Gain(album_gain, album_peak), track_gains) + + def _construct_cmd(self, item, peak_method): + """Construct the shell command to analyse items.""" + return [ + self._ffmpeg_path, + "-nostats", + "-hide_banner", + "-i", + item.path, + "-map", + "a:0", + "-filter", + f"ebur128=peak={peak_method}", + "-f", + "null", + "-", + ] + + def _analyse_item(self, item, target_level, peak, count_blocks=True): + """Analyse item. Return a pair of a Gain object and the number + of gating blocks above the threshold. + + If `count_blocks` is False, the number of gating blocks returned + will be 0. + """ + target_level_lufs = db_to_lufs(target_level) + peak_method = peak.name + + # call ffmpeg + self._log.debug(f"analyzing {item}") + cmd = self._construct_cmd(item, peak_method) + self._log.debug( + 'executing {0}', ' '.join(map(displayable_path, cmd)) + ) + output = call(cmd).stderr.splitlines() + + # parse output + + if peak == Peak.none: + peak = 0 + else: + line_peak = self._find_line( + output, + f" {peak_method.capitalize()} peak:".encode(), + start_line=len(output) - 1, step_size=-1, + ) + peak = self._parse_float( + output[self._find_line( + output, b" Peak:", + line_peak, + )] + ) + # convert TPFS -> part of FS + peak = 10**(peak / 20) + + line_integrated_loudness = self._find_line( + output, b" Integrated loudness:", + start_line=len(output) - 1, step_size=-1, + ) + gain = self._parse_float( + output[self._find_line( + output, b" I:", + line_integrated_loudness, + )] + ) + # convert LUFS -> LU from target level + gain = target_level_lufs - gain + + # count BS.1770 gating blocks + n_blocks = 0 + if count_blocks: + gating_threshold = self._parse_float( + output[self._find_line( + output, b" Threshold:", + start_line=line_integrated_loudness, + )] + ) + for line in output: + if not line.startswith(b"[Parsed_ebur128"): + continue + if line.endswith(b"Summary:"): + continue + line = line.split(b"M:", 1) + if len(line) < 2: + continue + if self._parse_float(b"M: " + line[1]) >= gating_threshold: + n_blocks += 1 + self._log.debug( + "{}: {} blocks over {} LUFS" + .format(item, n_blocks, gating_threshold) + ) + + self._log.debug( + "{}: gain {} LU, peak {}" + .format(item, gain, peak) + ) + + return Gain(gain, peak), n_blocks + + def _find_line(self, output, search, start_line=0, step_size=1): + """Return index of line beginning with `search`. + + Begins searching at index `start_line` in `output`. + """ + end_index = len(output) if step_size > 0 else -1 + for i in range(start_line, end_index, step_size): + if output[i].startswith(search): + return i + raise ReplayGainError( + "ffmpeg output: missing {} after line {}" + .format(repr(search), start_line) + ) + + def _parse_float(self, line): + """Extract a float from a key value pair in `line`. + + This format is expected: /[^:]:[[:space:]]*value.*/, where `value` is + the float. + """ + # extract value + value = line.split(b":", 1) + if len(value) < 2: raise ReplayGainError( - u'unrecognized filename in bs1770gain output ' - '(bs1770gain can only deal with utf-8 file names)') - if is_album: - out.append(album_gain["album"]) - return out + "ffmpeg output: expected key value pair, found {}" + .format(line) + ) + value = value[1].lstrip() + # strip unit + value = value.split(b" ", 1)[0] + # cast value to float + try: + return float(value) + except ValueError: + raise ReplayGainError( + "ffmpeg output: expected float value, found {}" + .format(value) + ) # mpgain/aacgain CLI tool backend. class CommandBackend(Backend): + do_parallel = True def __init__(self, config, log): - super(CommandBackend, self).__init__(config, log) + super().__init__(config, log) config.add({ - 'command': u"", + 'command': "", 'noclip': True, }) @@ -288,7 +394,7 @@ class CommandBackend(Backend): # Explicit executable path. if not os.path.isfile(self.command): raise FatalReplayGainError( - u'replaygain command does not exist: {0}'.format( + 'replaygain command does not exist: {}'.format( self.command) ) else: @@ -301,34 +407,32 @@ class CommandBackend(Backend): pass if not self.command: raise FatalReplayGainError( - u'no replaygain command found: install mp3gain or aacgain' + 'no replaygain command found: install mp3gain or aacgain' ) self.noclip = config['noclip'].get(bool) - target_level = config['targetlevel'].as_number() - self.gain_offset = int(target_level - 89) - def compute_track_gain(self, items): + def compute_track_gain(self, items, target_level, peak): """Computes the track gain of the given tracks, returns a list of TrackGain objects. """ supported_items = list(filter(self.format_supported, items)) - output = self.compute_gain(supported_items, False) + output = self.compute_gain(supported_items, target_level, False) return output - def compute_album_gain(self, album): + def compute_album_gain(self, items, target_level, peak): """Computes the album gain of the given album, returns an AlbumGain object. """ # TODO: What should be done when not all tracks in the album are # supported? - supported_items = list(filter(self.format_supported, album.items())) - if len(supported_items) != len(album.items()): - self._log.debug(u'tracks are of unsupported format') + supported_items = list(filter(self.format_supported, items)) + if len(supported_items) != len(items): + self._log.debug('tracks are of unsupported format') return AlbumGain(None, []) - output = self.compute_gain(supported_items, True) + output = self.compute_gain(supported_items, target_level, True) return AlbumGain(output[-1], output[:-1]) def format_supported(self, item): @@ -340,7 +444,7 @@ class CommandBackend(Backend): return False return True - def compute_gain(self, items, is_album): + def compute_gain(self, items, target_level, is_album): """Computes the track or album gain of a list of items, returns a list of TrackGain objects. @@ -348,7 +452,7 @@ class CommandBackend(Backend): the album gain """ if len(items) == 0: - self._log.debug(u'no supported tracks to analyze') + self._log.debug('no supported tracks to analyze') return [] """Compute ReplayGain values and return a list of results @@ -367,13 +471,13 @@ class CommandBackend(Backend): else: # Disable clipping warning. cmd = cmd + ['-c'] - cmd = cmd + ['-d', str(self.gain_offset)] + cmd = cmd + ['-d', str(int(target_level - 89))] cmd = cmd + [syspath(i.path) for i in items] - self._log.debug(u'analyzing {0} files', len(items)) - self._log.debug(u"executing {0}", " ".join(map(displayable_path, cmd))) - output = call(cmd) - self._log.debug(u'analysis finished') + self._log.debug('analyzing {0} files', len(items)) + self._log.debug("executing {0}", " ".join(map(displayable_path, cmd))) + output = call(cmd).stdout + self._log.debug('analysis finished') return self.parse_tool_output(output, len(items) + (1 if is_album else 0)) @@ -386,8 +490,8 @@ class CommandBackend(Backend): for line in text.split(b'\n')[1:num_lines + 1]: parts = line.split(b'\t') if len(parts) != 6 or parts[0] == b'File': - self._log.debug(u'bad tool output: {0}', text) - raise ReplayGainError(u'mp3gain failed') + self._log.debug('bad tool output: {0}', text) + raise ReplayGainError('mp3gain failed') d = { 'file': parts[0], 'mp3gain': int(parts[1]), @@ -404,9 +508,8 @@ class CommandBackend(Backend): # GStreamer-based backend. class GStreamerBackend(Backend): - def __init__(self, config, log): - super(GStreamerBackend, self).__init__(config, log) + super().__init__(config, log) self._import_gst() # Initialized a GStreamer pipeline of the form filesrc -> @@ -423,15 +526,13 @@ class GStreamerBackend(Backend): if self._src is None or self._decbin is None or self._conv is None \ or self._res is None or self._rg is None: raise FatalGstreamerPluginReplayGainError( - u"Failed to load required GStreamer plugins" + "Failed to load required GStreamer plugins" ) # We check which files need gain ourselves, so all files given # to rganalsys should have their gain computed, even if it # already exists. self._rg.set_property("forced", True) - self._rg.set_property("reference-level", - config["targetlevel"].as_number()) self._sink = self.Gst.ElementFactory.make("fakesink", "sink") self._pipe = self.Gst.Pipeline() @@ -470,14 +571,14 @@ class GStreamerBackend(Backend): import gi except ImportError: raise FatalReplayGainError( - u"Failed to load GStreamer: python-gi not found" + "Failed to load GStreamer: python-gi not found" ) try: gi.require_version('Gst', '1.0') except ValueError as e: raise FatalReplayGainError( - u"Failed to load GStreamer 1.0: {0}".format(e) + f"Failed to load GStreamer 1.0: {e}" ) from gi.repository import GObject, Gst, GLib @@ -492,7 +593,7 @@ class GStreamerBackend(Backend): self.GLib = GLib self.Gst = Gst - def compute(self, files, album): + def compute(self, files, target_level, album): self._error = None self._files = list(files) @@ -501,6 +602,8 @@ class GStreamerBackend(Backend): self._file_tags = collections.defaultdict(dict) + self._rg.set_property("reference-level", target_level) + if album: self._rg.set_property("num-tracks", len(self._files)) @@ -509,10 +612,10 @@ class GStreamerBackend(Backend): if self._error is not None: raise self._error - def compute_track_gain(self, items): - self.compute(items, False) + def compute_track_gain(self, items, target_level, peak): + self.compute(items, target_level, False) if len(self._file_tags) != len(items): - raise ReplayGainError(u"Some tracks did not receive tags") + raise ReplayGainError("Some tracks did not receive tags") ret = [] for item in items: @@ -521,11 +624,11 @@ class GStreamerBackend(Backend): return ret - def compute_album_gain(self, album): - items = list(album.items()) - self.compute(items, True) + def compute_album_gain(self, items, target_level, peak): + items = list(items) + self.compute(items, target_level, True) if len(self._file_tags) != len(items): - raise ReplayGainError(u"Some items in album did not receive tags") + raise ReplayGainError("Some items in album did not receive tags") # Collect track gains. track_gains = [] @@ -534,7 +637,7 @@ class GStreamerBackend(Backend): gain = self._file_tags[item]["TRACK_GAIN"] peak = self._file_tags[item]["TRACK_PEAK"] except KeyError: - raise ReplayGainError(u"results missing for track") + raise ReplayGainError("results missing for track") track_gains.append(Gain(gain, peak)) # Get album gain information from the last track. @@ -543,7 +646,7 @@ class GStreamerBackend(Backend): gain = last_tags["ALBUM_GAIN"] peak = last_tags["ALBUM_PEAK"] except KeyError: - raise ReplayGainError(u"results missing for album") + raise ReplayGainError("results missing for album") return AlbumGain(Gain(gain, peak), track_gains) @@ -565,7 +668,7 @@ class GStreamerBackend(Backend): f = self._src.get_property("location") # A GStreamer error, either an unsupported format or a bug. self._error = ReplayGainError( - u"Error {0!r} - {1!r} on file {2!r}".format(err, debug, f) + f"Error {err!r} - {debug!r} on file {f!r}" ) def _on_tag(self, bus, message): @@ -678,7 +781,7 @@ class AudioToolsBackend(Backend): """ def __init__(self, config, log): - super(AudioToolsBackend, self).__init__(config, log) + super().__init__(config, log) self._import_audiotools() def _import_audiotools(self): @@ -692,7 +795,7 @@ class AudioToolsBackend(Backend): import audiotools.replaygain except ImportError: raise FatalReplayGainError( - u"Failed to load audiotools: audiotools not found" + "Failed to load audiotools: audiotools not found" ) self._mod_audiotools = audiotools self._mod_replaygain = audiotools.replaygain @@ -707,14 +810,14 @@ class AudioToolsBackend(Backend): file format is not supported """ try: - audiofile = self._mod_audiotools.open(item.path) - except IOError: + audiofile = self._mod_audiotools.open(py3_path(syspath(item.path))) + except OSError: raise ReplayGainError( - u"File {} was not found".format(item.path) + f"File {item.path} was not found" ) except self._mod_audiotools.UnsupportedFile: raise ReplayGainError( - u"Unsupported file type {}".format(item.format) + f"Unsupported file type {item.format}" ) return audiofile @@ -733,18 +836,25 @@ class AudioToolsBackend(Backend): rg = self._mod_replaygain.ReplayGain(audiofile.sample_rate()) except ValueError: raise ReplayGainError( - u"Unsupported sample rate {}".format(item.samplerate)) + f"Unsupported sample rate {item.samplerate}") return return rg - def compute_track_gain(self, items): + def compute_track_gain(self, items, target_level, peak): """Compute ReplayGain values for the requested items. :return list: list of :class:`Gain` objects """ - return [self._compute_track_gain(item) for item in items] + return [self._compute_track_gain(item, target_level) for item in items] - def _title_gain(self, rg, audiofile): + def _with_target_level(self, gain, target_level): + """Return `gain` relative to `target_level`. + + Assumes `gain` is relative to 89 db. + """ + return gain + (target_level - 89) + + def _title_gain(self, rg, audiofile, target_level): """Get the gain result pair from PyAudioTools using the `ReplayGain` instance `rg` for the given `audiofile`. @@ -754,14 +864,15 @@ class AudioToolsBackend(Backend): try: # The method needs an audiotools.PCMReader instance that can # be obtained from an audiofile instance. - return rg.title_gain(audiofile.to_pcm()) + gain, peak = rg.title_gain(audiofile.to_pcm()) except ValueError as exc: # `audiotools.replaygain` can raise a `ValueError` if the sample # rate is incorrect. - self._log.debug(u'error in rg.title_gain() call: {}', exc) - raise ReplayGainError(u'audiotools audio data error') + self._log.debug('error in rg.title_gain() call: {}', exc) + raise ReplayGainError('audiotools audio data error') + return self._with_target_level(gain, target_level), peak - def _compute_track_gain(self, item): + def _compute_track_gain(self, item, target_level): """Compute ReplayGain value for the requested item. :rtype: :class:`Gain` @@ -771,41 +882,44 @@ class AudioToolsBackend(Backend): # Each call to title_gain on a ReplayGain object returns peak and gain # of the track. - rg_track_gain, rg_track_peak = self._title_gain(rg, audiofile) + rg_track_gain, rg_track_peak = self._title_gain( + rg, audiofile, target_level + ) - self._log.debug(u'ReplayGain for track {0} - {1}: {2:.2f}, {3:.2f}', + self._log.debug('ReplayGain for track {0} - {1}: {2:.2f}, {3:.2f}', item.artist, item.title, rg_track_gain, rg_track_peak) return Gain(gain=rg_track_gain, peak=rg_track_peak) - def compute_album_gain(self, album): + def compute_album_gain(self, items, target_level, peak): """Compute ReplayGain values for the requested album and its items. :rtype: :class:`AlbumGain` """ - self._log.debug(u'Analysing album {0}', album) - # The first item is taken and opened to get the sample rate to # initialize the replaygain object. The object is used for all the # tracks in the album to get the album values. - item = list(album.items())[0] + item = list(items)[0] audiofile = self.open_audio_file(item) rg = self.init_replaygain(audiofile, item) track_gains = [] - for item in album.items(): + for item in items: audiofile = self.open_audio_file(item) - rg_track_gain, rg_track_peak = self._title_gain(rg, audiofile) + rg_track_gain, rg_track_peak = self._title_gain( + rg, audiofile, target_level + ) track_gains.append( Gain(gain=rg_track_gain, peak=rg_track_peak) ) - self._log.debug(u'ReplayGain for track {0}: {1:.2f}, {2:.2f}', + self._log.debug('ReplayGain for track {0}: {1:.2f}, {2:.2f}', item, rg_track_gain, rg_track_peak) # After getting the values for all tracks, it's possible to get the # album values. rg_album_gain, rg_album_peak = rg.album_gain() - self._log.debug(u'ReplayGain for album {0}: {1:.2f}, {2:.2f}', - album, rg_album_gain, rg_album_peak) + rg_album_gain = self._with_target_level(rg_album_gain, target_level) + self._log.debug('ReplayGain for album {0}: {1:.2f}, {2:.2f}', + items[0].album, rg_album_gain, rg_album_peak) return AlbumGain( Gain(gain=rg_album_gain, peak=rg_album_peak), @@ -813,6 +927,33 @@ class AudioToolsBackend(Backend): ) +class ExceptionWatcher(Thread): + """Monitors a queue for exceptions asynchronously. + Once an exception occurs, raise it and execute a callback. + """ + + def __init__(self, queue, callback): + self._queue = queue + self._callback = callback + self._stopevent = Event() + Thread.__init__(self) + + def run(self): + while not self._stopevent.is_set(): + try: + exc = self._queue.get_nowait() + self._callback() + raise exc[1].with_traceback(exc[2]) + except queue.Empty: + # No exceptions yet, loop back to check + # whether `_stopevent` is set + pass + + def join(self, timeout=None): + self._stopevent.set() + Thread.join(self, timeout) + + # Main plugin logic. class ReplayGainPlugin(BeetsPlugin): @@ -823,48 +964,72 @@ class ReplayGainPlugin(BeetsPlugin): "command": CommandBackend, "gstreamer": GStreamerBackend, "audiotools": AudioToolsBackend, - "bs1770gain": Bs1770gainBackend, + "ffmpeg": FfmpegBackend, + } + + peak_methods = { + "true": Peak.true, + "sample": Peak.sample, } def __init__(self): - super(ReplayGainPlugin, self).__init__() + super().__init__() # default backend is 'command' for backward-compatibility. self.config.add({ 'overwrite': False, 'auto': True, - 'backend': u'command', + 'backend': 'command', + 'threads': cpu_count(), + 'parallel_on_import': False, + 'per_disc': False, + 'peak': 'true', 'targetlevel': 89, 'r128': ['Opus'], + 'r128_targetlevel': lufs_to_db(-23), }) self.overwrite = self.config['overwrite'].get(bool) - backend_name = self.config['backend'].as_str() - if backend_name not in self.backends: + self.per_disc = self.config['per_disc'].get(bool) + + # Remember which backend is used for CLI feedback + self.backend_name = self.config['backend'].as_str() + + if self.backend_name not in self.backends: raise ui.UserError( - u"Selected ReplayGain backend {0} is not supported. " - u"Please select one of: {1}".format( - backend_name, - u', '.join(self.backends.keys()) + "Selected ReplayGain backend {} is not supported. " + "Please select one of: {}".format( + self.backend_name, + ', '.join(self.backends.keys()) ) ) + peak_method = self.config["peak"].as_str() + if peak_method not in self.peak_methods: + raise ui.UserError( + "Selected ReplayGain peak method {} is not supported. " + "Please select one of: {}".format( + peak_method, + ', '.join(self.peak_methods.keys()) + ) + ) + self._peak_method = self.peak_methods[peak_method] # On-import analysis. if self.config['auto']: + self.register_listener('import_begin', self.import_begin) + self.register_listener('import', self.import_end) self.import_stages = [self.imported] # Formats to use R128. self.r128_whitelist = self.config['r128'].as_str_seq() try: - self.backend_instance = self.backends[backend_name]( + self.backend_instance = self.backends[self.backend_name]( self.config, self._log ) except (ReplayGainError, FatalReplayGainError) as e: raise ui.UserError( - u'replaygain initialization failed: {0}'.format(e)) - - self.r128_backend_instance = '' + f'replaygain initialization failed: {e}') def should_use_r128(self, item): """Checks the plugin setting to decide whether the calculation @@ -895,29 +1060,47 @@ class ReplayGainPlugin(BeetsPlugin): item.rg_track_gain = track_gain.gain item.rg_track_peak = track_gain.peak item.store() - - self._log.debug(u'applied track gain {0}, peak {1}', + self._log.debug('applied track gain {0} LU, peak {1} of FS', item.rg_track_gain, item.rg_track_peak) + def store_album_gain(self, item, album_gain): + item.rg_album_gain = album_gain.gain + item.rg_album_peak = album_gain.peak + item.store() + self._log.debug('applied album gain {0} LU, peak {1} of FS', + item.rg_album_gain, item.rg_album_peak) + def store_track_r128_gain(self, item, track_gain): - item.r128_track_gain = int(round(track_gain.gain * pow(2, 8))) + item.r128_track_gain = track_gain.gain item.store() - self._log.debug(u'applied track gain {0}', item.r128_track_gain) + self._log.debug('applied r128 track gain {0} LU', + item.r128_track_gain) - def store_album_gain(self, album, album_gain): - album.rg_album_gain = album_gain.gain - album.rg_album_peak = album_gain.peak - album.store() + def store_album_r128_gain(self, item, album_gain): + item.r128_album_gain = album_gain.gain + item.store() + self._log.debug('applied r128 album gain {0} LU', + item.r128_album_gain) - self._log.debug(u'applied album gain {0}, peak {1}', - album.rg_album_gain, album.rg_album_peak) + def tag_specific_values(self, items): + """Return some tag specific values. - def store_album_r128_gain(self, album, album_gain): - album.r128_album_gain = int(round(album_gain.gain * pow(2, 8))) - album.store() + Returns a tuple (store_track_gain, store_album_gain, target_level, + peak_method). + """ + if any([self.should_use_r128(item) for item in items]): + store_track_gain = self.store_track_r128_gain + store_album_gain = self.store_album_r128_gain + target_level = self.config['r128_targetlevel'].as_number() + peak = Peak.none # R128_* tags do not store the track/album peak + else: + store_track_gain = self.store_track_gain + store_album_gain = self.store_album_gain + target_level = self.config['targetlevel'].as_number() + peak = self._peak_method - self._log.debug(u'applied album gain {0}', album.r128_album_gain) + return store_track_gain, store_album_gain, target_level, peak def handle_album(self, album, write, force=False): """Compute album and track replay gain store it in all of the @@ -928,47 +1111,65 @@ class ReplayGainPlugin(BeetsPlugin): items, nothing is done. """ if not force and not self.album_requires_gain(album): - self._log.info(u'Skipping album {0}', album) + self._log.info('Skipping album {0}', album) return - self._log.info(u'analyzing {0}', album) - if (any([self.should_use_r128(item) for item in album.items()]) and not - all(([self.should_use_r128(item) for item in album.items()]))): - raise ReplayGainError( - u"Mix of ReplayGain and EBU R128 detected" - u" for some tracks in album {0}".format(album) - ) + all([self.should_use_r128(item) for item in album.items()])): + self._log.error( + "Cannot calculate gain for album {0} (incompatible formats)", + album) + return - if any([self.should_use_r128(item) for item in album.items()]): - if self.r128_backend_instance == '': - self.init_r128_backend() - backend_instance = self.r128_backend_instance - store_track_gain = self.store_track_r128_gain - store_album_gain = self.store_album_r128_gain + self._log.info('analyzing {0}', album) + + tag_vals = self.tag_specific_values(album.items()) + store_track_gain, store_album_gain, target_level, peak = tag_vals + + discs = {} + if self.per_disc: + for item in album.items(): + if discs.get(item.disc) is None: + discs[item.disc] = [] + discs[item.disc].append(item) else: - backend_instance = self.backend_instance - store_track_gain = self.store_track_gain - store_album_gain = self.store_album_gain + discs[1] = album.items() - try: - album_gain = backend_instance.compute_album_gain(album) - if len(album_gain.track_gains) != len(album.items()): - raise ReplayGainError( - u"ReplayGain backend failed " - u"for some tracks in album {0}".format(album) + for discnumber, items in discs.items(): + def _store_album(album_gain): + if not album_gain or not album_gain.album_gain \ + or len(album_gain.track_gains) != len(items): + # In some cases, backends fail to produce a valid + # `album_gain` without throwing FatalReplayGainError + # => raise non-fatal exception & continue + raise ReplayGainError( + "ReplayGain backend `{}` failed " + "for some tracks in album {}" + .format(self.backend_name, album) + ) + for item, track_gain in zip(items, + album_gain.track_gains): + store_track_gain(item, track_gain) + store_album_gain(item, album_gain.album_gain) + if write: + item.try_write() + self._log.debug('done analyzing {0}', item) + + try: + self._apply( + self.backend_instance.compute_album_gain, args=(), + kwds={ + "items": list(items), + "target_level": target_level, + "peak": peak + }, + callback=_store_album ) - - store_album_gain(album, album_gain.album_gain) - for item, track_gain in zip(album.items(), album_gain.track_gains): - store_track_gain(item, track_gain) - if write: - item.try_write() - except ReplayGainError as e: - self._log.info(u"ReplayGain error: {0}", e) - except FatalReplayGainError as e: - raise ui.UserError( - u"Fatal replay gain error: {0}".format(e)) + except ReplayGainError as e: + self._log.info("ReplayGain error: {0}", e) + except FatalReplayGainError as e: + raise ui.UserError( + f"Fatal replay gain error: {e}") def handle_track(self, item, write, force=False): """Compute track replay gain and store it in the item. @@ -978,83 +1179,190 @@ class ReplayGainPlugin(BeetsPlugin): in the item, nothing is done. """ if not force and not self.track_requires_gain(item): - self._log.info(u'Skipping track {0}', item) + self._log.info('Skipping track {0}', item) return - self._log.info(u'analyzing {0}', item) + tag_vals = self.tag_specific_values([item]) + store_track_gain, store_album_gain, target_level, peak = tag_vals - if self.should_use_r128(item): - if self.r128_backend_instance == '': - self.init_r128_backend() - backend_instance = self.r128_backend_instance - store_track_gain = self.store_track_r128_gain - else: - backend_instance = self.backend_instance - store_track_gain = self.store_track_gain - - try: - track_gains = backend_instance.compute_track_gain([item]) - if len(track_gains) != 1: + def _store_track(track_gains): + if not track_gains or len(track_gains) != 1: + # In some cases, backends fail to produce a valid + # `track_gains` without throwing FatalReplayGainError + # => raise non-fatal exception & continue raise ReplayGainError( - u"ReplayGain backend failed for track {0}".format(item) + "ReplayGain backend `{}` failed for track {}" + .format(self.backend_name, item) ) store_track_gain(item, track_gains[0]) if write: item.try_write() - except ReplayGainError as e: - self._log.info(u"ReplayGain error: {0}", e) - except FatalReplayGainError as e: - raise ui.UserError( - u"Fatal replay gain error: {0}".format(e)) - - def init_r128_backend(self): - backend_name = 'bs1770gain' + self._log.debug('done analyzing {0}', item) try: - self.r128_backend_instance = self.backends[backend_name]( - self.config, self._log + self._apply( + self.backend_instance.compute_track_gain, args=(), + kwds={ + "items": [item], + "target_level": target_level, + "peak": peak, + }, + callback=_store_track ) - except (ReplayGainError, FatalReplayGainError) as e: - raise ui.UserError( - u'replaygain initialization failed: {0}'.format(e)) + except ReplayGainError as e: + self._log.info("ReplayGain error: {0}", e) + except FatalReplayGainError as e: + raise ui.UserError(f"Fatal replay gain error: {e}") - self.r128_backend_instance.method = '--ebu' + def _has_pool(self): + """Check whether a `ThreadPool` is running instance in `self.pool` + """ + if hasattr(self, 'pool'): + if isinstance(self.pool, ThreadPool) and self.pool._state == RUN: + return True + return False + + def open_pool(self, threads): + """Open a `ThreadPool` instance in `self.pool` + """ + if not self._has_pool() and self.backend_instance.do_parallel: + self.pool = ThreadPool(threads) + self.exc_queue = queue.Queue() + + signal.signal(signal.SIGINT, self._interrupt) + + self.exc_watcher = ExceptionWatcher( + self.exc_queue, # threads push exceptions here + self.terminate_pool # abort once an exception occurs + ) + self.exc_watcher.start() + + def _apply(self, func, args, kwds, callback): + if self._has_pool(): + def catch_exc(func, exc_queue, log): + """Wrapper to catch raised exceptions in threads + """ + def wfunc(*args, **kwargs): + try: + return func(*args, **kwargs) + except ReplayGainError as e: + log.info(e.args[0]) # log non-fatal exceptions + except Exception: + exc_queue.put(sys.exc_info()) + return wfunc + + # Wrap function and callback to catch exceptions + func = catch_exc(func, self.exc_queue, self._log) + callback = catch_exc(callback, self.exc_queue, self._log) + + self.pool.apply_async(func, args, kwds, callback) + else: + callback(func(*args, **kwds)) + + def terminate_pool(self): + """Terminate the `ThreadPool` instance in `self.pool` + (e.g. stop execution in case of exception) + """ + # Don't call self._as_pool() here, + # self.pool._state may not be == RUN + if hasattr(self, 'pool') and isinstance(self.pool, ThreadPool): + self.pool.terminate() + self.pool.join() + # self.exc_watcher.join() + + def _interrupt(self, signal, frame): + try: + self._log.info('interrupted') + self.terminate_pool() + sys.exit(0) + except SystemExit: + # Silence raised SystemExit ~ exit(0) + pass + + def close_pool(self): + """Close the `ThreadPool` instance in `self.pool` (if there is one) + """ + if self._has_pool(): + self.pool.close() + self.pool.join() + self.exc_watcher.join() + + def import_begin(self, session): + """Handle `import_begin` event -> open pool + """ + threads = self.config['threads'].get(int) + + if self.config['parallel_on_import'] \ + and self.config['auto'] \ + and threads: + self.open_pool(threads) + + def import_end(self, paths): + """Handle `import` event -> close pool + """ + self.close_pool() def imported(self, session, task): """Add replay gain info to items or albums of ``task``. """ - if task.is_album: - self.handle_album(task.album, False) - else: - self.handle_track(task.item, False) + if self.config['auto']: + if task.is_album: + self.handle_album(task.album, False) + else: + self.handle_track(task.item, False) + + def command_func(self, lib, opts, args): + try: + write = ui.should_write(opts.write) + force = opts.force + + # Bypass self.open_pool() if called with `--threads 0` + if opts.threads != 0: + threads = opts.threads or self.config['threads'].get(int) + self.open_pool(threads) + + if opts.album: + albums = lib.albums(ui.decargs(args)) + self._log.info( + "Analyzing {} albums ~ {} backend..." + .format(len(albums), self.backend_name) + ) + for album in albums: + self.handle_album(album, write, force) + else: + items = lib.items(ui.decargs(args)) + self._log.info( + "Analyzing {} tracks ~ {} backend..." + .format(len(items), self.backend_name) + ) + for item in items: + self.handle_track(item, write, force) + + self.close_pool() + except (SystemExit, KeyboardInterrupt): + # Silence interrupt exceptions + pass def commands(self): """Return the "replaygain" ui subcommand. """ - def func(lib, opts, args): - write = ui.should_write(opts.write) - force = opts.force - - if opts.album: - for album in lib.albums(ui.decargs(args)): - self.handle_album(album, write, force) - - else: - for item in lib.items(ui.decargs(args)): - self.handle_track(item, write, force) - - cmd = ui.Subcommand('replaygain', help=u'analyze for ReplayGain') + cmd = ui.Subcommand('replaygain', help='analyze for ReplayGain') cmd.parser.add_album_option() + cmd.parser.add_option( + "-t", "--threads", dest="threads", type=int, + help='change the number of threads, \ + defaults to maximum available processors' + ) cmd.parser.add_option( "-f", "--force", dest="force", action="store_true", default=False, - help=u"analyze all files, including those that " + help="analyze all files, including those that " "already have ReplayGain metadata") cmd.parser.add_option( "-w", "--write", default=None, action="store_true", - help=u"write new metadata to files' tags") + help="write new metadata to files' tags") cmd.parser.add_option( "-W", "--nowrite", dest="write", action="store_false", - help=u"don't write metadata (opposite of -w)") - cmd.func = func + help="don't write metadata (opposite of -w)") + cmd.func = self.command_func return [cmd] diff --git a/libs/common/beetsplug/rewrite.py b/libs/common/beetsplug/rewrite.py index eadb1425..e02e4080 100644 --- a/libs/common/beetsplug/rewrite.py +++ b/libs/common/beetsplug/rewrite.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,7 +15,6 @@ """Uses user-specified rewriting rules to canonicalize names for path formats. """ -from __future__ import division, absolute_import, print_function import re from collections import defaultdict @@ -44,7 +42,7 @@ def rewriter(field, rules): class RewritePlugin(BeetsPlugin): def __init__(self): - super(RewritePlugin, self).__init__() + super().__init__() self.config.add({}) @@ -55,11 +53,11 @@ class RewritePlugin(BeetsPlugin): try: fieldname, pattern = key.split(None, 1) except ValueError: - raise ui.UserError(u"invalid rewrite specification") + raise ui.UserError("invalid rewrite specification") if fieldname not in library.Item._fields: - raise ui.UserError(u"invalid field name (%s) in rewriter" % + raise ui.UserError("invalid field name (%s) in rewriter" % fieldname) - self._log.debug(u'adding template field {0}', key) + self._log.debug('adding template field {0}', key) pattern = re.compile(pattern.lower()) rules[fieldname].append((pattern, value)) if fieldname == 'artist': diff --git a/libs/common/beetsplug/scrub.py b/libs/common/beetsplug/scrub.py index be6e7fd1..d8044668 100644 --- a/libs/common/beetsplug/scrub.py +++ b/libs/common/beetsplug/scrub.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -17,13 +16,12 @@ automatically whenever tags are written. """ -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets import ui from beets import util from beets import config -from beets import mediafile +import mediafile import mutagen _MUTAGEN_FORMATS = { @@ -48,7 +46,7 @@ _MUTAGEN_FORMATS = { class ScrubPlugin(BeetsPlugin): """Removes extraneous metadata from files' tags.""" def __init__(self): - super(ScrubPlugin, self).__init__() + super().__init__() self.config.add({ 'auto': True, }) @@ -60,15 +58,15 @@ class ScrubPlugin(BeetsPlugin): def scrub_func(lib, opts, args): # Walk through matching files and remove tags. for item in lib.items(ui.decargs(args)): - self._log.info(u'scrubbing: {0}', + self._log.info('scrubbing: {0}', util.displayable_path(item.path)) self._scrub_item(item, opts.write) - scrub_cmd = ui.Subcommand('scrub', help=u'clean audio tags') + scrub_cmd = ui.Subcommand('scrub', help='clean audio tags') scrub_cmd.parser.add_option( - u'-W', u'--nowrite', dest='write', + '-W', '--nowrite', dest='write', action='store_false', default=True, - help=u'leave tags empty') + help='leave tags empty') scrub_cmd.func = scrub_func return [scrub_cmd] @@ -79,7 +77,7 @@ class ScrubPlugin(BeetsPlugin): """ classes = [] for modname, clsname in _MUTAGEN_FORMATS.items(): - mod = __import__('mutagen.{0}'.format(modname), + mod = __import__(f'mutagen.{modname}', fromlist=[clsname]) classes.append(getattr(mod, clsname)) return classes @@ -107,8 +105,8 @@ class ScrubPlugin(BeetsPlugin): for tag in f.keys(): del f[tag] f.save() - except (IOError, mutagen.MutagenError) as exc: - self._log.error(u'could not scrub {0}: {1}', + except (OSError, mutagen.MutagenError) as exc: + self._log.error('could not scrub {0}: {1}', util.displayable_path(path), exc) def _scrub_item(self, item, restore=True): @@ -121,7 +119,7 @@ class ScrubPlugin(BeetsPlugin): mf = mediafile.MediaFile(util.syspath(item.path), config['id3v23'].get(bool)) except mediafile.UnreadableFileError as exc: - self._log.error(u'could not open file to scrub: {0}', + self._log.error('could not open file to scrub: {0}', exc) return images = mf.images @@ -131,21 +129,21 @@ class ScrubPlugin(BeetsPlugin): # Restore tags, if enabled. if restore: - self._log.debug(u'writing new tags after scrub') + self._log.debug('writing new tags after scrub') item.try_write() if images: - self._log.debug(u'restoring art') + self._log.debug('restoring art') try: mf = mediafile.MediaFile(util.syspath(item.path), config['id3v23'].get(bool)) mf.images = images mf.save() except mediafile.UnreadableFileError as exc: - self._log.error(u'could not write tags: {0}', exc) + self._log.error('could not write tags: {0}', exc) def import_task_files(self, session, task): """Automatically scrub imported files.""" for item in task.imported_items(): - self._log.debug(u'auto-scrubbing {0}', + self._log.debug('auto-scrubbing {0}', util.displayable_path(item.path)) self._scrub_item(item) diff --git a/libs/common/beetsplug/smartplaylist.py b/libs/common/beetsplug/smartplaylist.py index 009512c5..4c921ecc 100644 --- a/libs/common/beetsplug/smartplaylist.py +++ b/libs/common/beetsplug/smartplaylist.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Dang Mai . # @@ -16,30 +15,38 @@ """Generates smart playlists based on beets queries. """ -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets import ui from beets.util import (mkdirall, normpath, sanitize_path, syspath, - bytestring_path) + bytestring_path, path_as_posix) from beets.library import Item, Album, parse_query_string from beets.dbcore import OrQuery from beets.dbcore.query import MultipleSort, ParsingError import os -import six + +try: + from urllib.request import pathname2url +except ImportError: + # python2 is a bit different + from urllib import pathname2url class SmartPlaylistPlugin(BeetsPlugin): def __init__(self): - super(SmartPlaylistPlugin, self).__init__() + super().__init__() self.config.add({ 'relative_to': None, - 'playlist_dir': u'.', + 'playlist_dir': '.', 'auto': True, - 'playlists': [] + 'playlists': [], + 'forward_slash': False, + 'prefix': '', + 'urlencode': False, }) + self.config['prefix'].redact = True # May contain username/password. self._matched_playlists = None self._unmatched_playlists = None @@ -49,8 +56,8 @@ class SmartPlaylistPlugin(BeetsPlugin): def commands(self): spl_update = ui.Subcommand( 'splupdate', - help=u'update the smart playlists. Playlist names may be ' - u'passed as arguments.' + help='update the smart playlists. Playlist names may be ' + 'passed as arguments.' ) spl_update.func = self.update_cmd return [spl_update] @@ -61,14 +68,14 @@ class SmartPlaylistPlugin(BeetsPlugin): args = set(ui.decargs(args)) for a in list(args): if not a.endswith(".m3u"): - args.add("{0}.m3u".format(a)) + args.add(f"{a}.m3u") - playlists = set((name, q, a_q) - for name, q, a_q in self._unmatched_playlists - if name in args) + playlists = {(name, q, a_q) + for name, q, a_q in self._unmatched_playlists + if name in args} if not playlists: raise ui.UserError( - u'No playlist matching any of {0} found'.format( + 'No playlist matching any of {} found'.format( [name for name, _, _ in self._unmatched_playlists]) ) @@ -81,7 +88,7 @@ class SmartPlaylistPlugin(BeetsPlugin): def build_queries(self): """ - Instanciate queries for the playlists. + Instantiate queries for the playlists. Each playlist has 2 queries: one or items one for albums, each with a sort. We must also remember its name. _unmatched_playlists is a set of @@ -99,22 +106,23 @@ class SmartPlaylistPlugin(BeetsPlugin): for playlist in self.config['playlists'].get(list): if 'name' not in playlist: - self._log.warning(u"playlist configuration is missing name") + self._log.warning("playlist configuration is missing name") continue playlist_data = (playlist['name'],) try: - for key, Model in (('query', Item), ('album_query', Album)): + for key, model_cls in (('query', Item), + ('album_query', Album)): qs = playlist.get(key) if qs is None: query_and_sort = None, None - elif isinstance(qs, six.string_types): - query_and_sort = parse_query_string(qs, Model) + elif isinstance(qs, str): + query_and_sort = parse_query_string(qs, model_cls) elif len(qs) == 1: - query_and_sort = parse_query_string(qs[0], Model) + query_and_sort = parse_query_string(qs[0], model_cls) else: # multiple queries and sorts - queries, sorts = zip(*(parse_query_string(q, Model) + queries, sorts = zip(*(parse_query_string(q, model_cls) for q in qs)) query = OrQuery(queries) final_sorts = [] @@ -135,7 +143,7 @@ class SmartPlaylistPlugin(BeetsPlugin): playlist_data += (query_and_sort,) except ParsingError as exc: - self._log.warning(u"invalid query in playlist {}: {}", + self._log.warning("invalid query in playlist {}: {}", playlist['name'], exc) continue @@ -156,14 +164,14 @@ class SmartPlaylistPlugin(BeetsPlugin): n, (q, _), (a_q, _) = playlist if self.matches(model, q, a_q): self._log.debug( - u"{0} will be updated because of {1}", n, model) + "{0} will be updated because of {1}", n, model) self._matched_playlists.add(playlist) self.register_listener('cli_exit', self.update_playlists) self._unmatched_playlists -= self._matched_playlists def update_playlists(self, lib): - self._log.info(u"Updating {0} smart playlists...", + self._log.info("Updating {0} smart playlists...", len(self._matched_playlists)) playlist_dir = self.config['playlist_dir'].as_filename() @@ -177,7 +185,7 @@ class SmartPlaylistPlugin(BeetsPlugin): for playlist in self._matched_playlists: name, (query, q_sort), (album_query, a_q_sort) = playlist - self._log.debug(u"Creating playlist {0}", name) + self._log.debug("Creating playlist {0}", name) items = [] if query: @@ -199,6 +207,7 @@ class SmartPlaylistPlugin(BeetsPlugin): if item_path not in m3us[m3u_name]: m3us[m3u_name].append(item_path) + prefix = bytestring_path(self.config['prefix'].as_str()) # Write all of the accumulated track lists to files. for m3u in m3us: m3u_path = normpath(os.path.join(playlist_dir, @@ -206,6 +215,10 @@ class SmartPlaylistPlugin(BeetsPlugin): mkdirall(m3u_path) with open(syspath(m3u_path), 'wb') as f: for path in m3us[m3u]: - f.write(path + b'\n') + if self.config['forward_slash'].get(): + path = path_as_posix(path) + if self.config['urlencode']: + path = bytestring_path(pathname2url(path)) + f.write(prefix + path + b'\n') - self._log.info(u"{0} playlists updated", len(self._matched_playlists)) + self._log.info("{0} playlists updated", len(self._matched_playlists)) diff --git a/libs/common/beetsplug/sonosupdate.py b/libs/common/beetsplug/sonosupdate.py index 56a315a1..aeb211d8 100644 --- a/libs/common/beetsplug/sonosupdate.py +++ b/libs/common/beetsplug/sonosupdate.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2018, Tobias Sauerwein. # @@ -16,7 +15,6 @@ """Updates a Sonos library whenever the beets library is changed. This is based on the Kodi Update plugin. """ -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin import soco @@ -24,7 +22,7 @@ import soco class SonosUpdate(BeetsPlugin): def __init__(self): - super(SonosUpdate, self).__init__() + super().__init__() self.register_listener('database_change', self.listen_for_db_change) def listen_for_db_change(self, lib, model): @@ -35,14 +33,14 @@ class SonosUpdate(BeetsPlugin): """When the client exists try to send refresh request to a Sonos controler. """ - self._log.info(u'Requesting a Sonos library update...') + self._log.info('Requesting a Sonos library update...') device = soco.discovery.any_soco() if device: device.music_library.start_library_update() else: - self._log.warning(u'Could not find a Sonos device.') + self._log.warning('Could not find a Sonos device.') return - self._log.info(u'Sonos update triggered') + self._log.info('Sonos update triggered') diff --git a/libs/common/beetsplug/spotify.py b/libs/common/beetsplug/spotify.py index 36231f29..2529160d 100644 --- a/libs/common/beetsplug/spotify.py +++ b/libs/common/beetsplug/spotify.py @@ -1,61 +1,379 @@ -# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2019, Rahul Ahuja. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function +"""Adds Spotify release and track search support to the autotagger, along with +Spotify playlist construction. +""" import re +import json +import base64 import webbrowser +import collections + +import unidecode import requests -from beets.plugins import BeetsPlugin -from beets.ui import decargs +import confuse + from beets import ui -from requests.exceptions import HTTPError +from beets.autotag.hooks import AlbumInfo, TrackInfo +from beets.plugins import MetadataSourcePlugin, BeetsPlugin -class SpotifyPlugin(BeetsPlugin): +class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): + data_source = 'Spotify' - # URL for the Web API of Spotify - # Documentation here: https://developer.spotify.com/web-api/search-item/ - base_url = "https://api.spotify.com/v1/search" - open_url = "http://open.spotify.com/track/" - playlist_partial = "spotify:trackset:Playlist:" + # Base URLs for the Spotify API + # Documentation: https://developer.spotify.com/web-api + oauth_token_url = 'https://accounts.spotify.com/api/token' + open_track_url = 'https://open.spotify.com/track/' + search_url = 'https://api.spotify.com/v1/search' + album_url = 'https://api.spotify.com/v1/albums/' + track_url = 'https://api.spotify.com/v1/tracks/' + + # Spotify IDs consist of 22 alphanumeric characters + # (zero-left-padded base62 representation of randomly generated UUID4) + id_regex = { + 'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})', + 'match_group': 2, + } def __init__(self): - super(SpotifyPlugin, self).__init__() - self.config.add({ - 'mode': 'list', - 'tiebreak': 'popularity', - 'show_failures': False, - 'artist_field': 'albumartist', - 'album_field': 'album', - 'track_field': 'title', - 'region_filter': None, - 'regex': [] - }) + super().__init__() + self.config.add( + { + 'mode': 'list', + 'tiebreak': 'popularity', + 'show_failures': False, + 'artist_field': 'albumartist', + 'album_field': 'album', + 'track_field': 'title', + 'region_filter': None, + 'regex': [], + 'client_id': '4e414367a1d14c75a5c5129a627fcab8', + 'client_secret': 'f82bdc09b2254f1a8286815d02fd46dc', + 'tokenfile': 'spotify_token.json', + } + ) + self.config['client_secret'].redact = True + + self.tokenfile = self.config['tokenfile'].get( + confuse.Filename(in_app_dir=True) + ) # Path to the JSON file for storing the OAuth access token. + self.setup() + + def setup(self): + """Retrieve previously saved OAuth token or generate a new one.""" + try: + with open(self.tokenfile) as f: + token_data = json.load(f) + except OSError: + self._authenticate() + else: + self.access_token = token_data['access_token'] + + def _authenticate(self): + """Request an access token via the Client Credentials Flow: + https://developer.spotify.com/documentation/general/guides/authorization-guide/#client-credentials-flow + """ + headers = { + 'Authorization': 'Basic {}'.format( + base64.b64encode( + ':'.join( + self.config[k].as_str() + for k in ('client_id', 'client_secret') + ).encode() + ).decode() + ) + } + response = requests.post( + self.oauth_token_url, + data={'grant_type': 'client_credentials'}, + headers=headers, + ) + try: + response.raise_for_status() + except requests.exceptions.HTTPError as e: + raise ui.UserError( + 'Spotify authorization failed: {}\n{}'.format( + e, response.text + ) + ) + self.access_token = response.json()['access_token'] + + # Save the token for later use. + self._log.debug( + '{} access token: {}', self.data_source, self.access_token + ) + with open(self.tokenfile, 'w') as f: + json.dump({'access_token': self.access_token}, f) + + def _handle_response(self, request_type, url, params=None): + """Send a request, reauthenticating if necessary. + + :param request_type: Type of :class:`Request` constructor, + e.g. ``requests.get``, ``requests.post``, etc. + :type request_type: function + :param url: URL for the new :class:`Request` object. + :type url: str + :param params: (optional) list of tuples or bytes to send + in the query string for the :class:`Request`. + :type params: dict + :return: JSON data for the class:`Response ` object. + :rtype: dict + """ + response = request_type( + url, + headers={'Authorization': f'Bearer {self.access_token}'}, + params=params, + ) + if response.status_code != 200: + if 'token expired' in response.text: + self._log.debug( + '{} access token has expired. Reauthenticating.', + self.data_source, + ) + self._authenticate() + return self._handle_response(request_type, url, params=params) + else: + raise ui.UserError( + '{} API error:\n{}\nURL:\n{}\nparams:\n{}'.format( + self.data_source, response.text, url, params + ) + ) + return response.json() + + def album_for_id(self, album_id): + """Fetch an album by its Spotify ID or URL and return an + AlbumInfo object or None if the album is not found. + + :param album_id: Spotify ID or URL for the album + :type album_id: str + :return: AlbumInfo object for album + :rtype: beets.autotag.hooks.AlbumInfo or None + """ + spotify_id = self._get_id('album', album_id) + if spotify_id is None: + return None + + album_data = self._handle_response( + requests.get, self.album_url + spotify_id + ) + artist, artist_id = self.get_artist(album_data['artists']) + + date_parts = [ + int(part) for part in album_data['release_date'].split('-') + ] + + release_date_precision = album_data['release_date_precision'] + if release_date_precision == 'day': + year, month, day = date_parts + elif release_date_precision == 'month': + year, month = date_parts + day = None + elif release_date_precision == 'year': + year = date_parts[0] + month = None + day = None + else: + raise ui.UserError( + "Invalid `release_date_precision` returned " + "by {} API: '{}'".format( + self.data_source, release_date_precision + ) + ) + + tracks = [] + medium_totals = collections.defaultdict(int) + for i, track_data in enumerate(album_data['tracks']['items'], start=1): + track = self._get_track(track_data) + track.index = i + medium_totals[track.medium] += 1 + tracks.append(track) + for track in tracks: + track.medium_total = medium_totals[track.medium] + + return AlbumInfo( + album=album_data['name'], + album_id=spotify_id, + artist=artist, + artist_id=artist_id, + tracks=tracks, + albumtype=album_data['album_type'], + va=len(album_data['artists']) == 1 + and artist.lower() == 'various artists', + year=year, + month=month, + day=day, + label=album_data['label'], + mediums=max(medium_totals.keys()), + data_source=self.data_source, + data_url=album_data['external_urls']['spotify'], + ) + + def _get_track(self, track_data): + """Convert a Spotify track object dict to a TrackInfo object. + + :param track_data: Simplified track object + (https://developer.spotify.com/documentation/web-api/reference/object-model/#track-object-simplified) + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo + """ + artist, artist_id = self.get_artist(track_data['artists']) + return TrackInfo( + title=track_data['name'], + track_id=track_data['id'], + artist=artist, + artist_id=artist_id, + length=track_data['duration_ms'] / 1000, + index=track_data['track_number'], + medium=track_data['disc_number'], + medium_index=track_data['track_number'], + data_source=self.data_source, + data_url=track_data['external_urls']['spotify'], + ) + + def track_for_id(self, track_id=None, track_data=None): + """Fetch a track by its Spotify ID or URL and return a + TrackInfo object or None if the track is not found. + + :param track_id: (Optional) Spotify ID or URL for the track. Either + ``track_id`` or ``track_data`` must be provided. + :type track_id: str + :param track_data: (Optional) Simplified track object dict. May be + provided instead of ``track_id`` to avoid unnecessary API calls. + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo or None + """ + if track_data is None: + spotify_id = self._get_id('track', track_id) + if spotify_id is None: + return None + track_data = self._handle_response( + requests.get, self.track_url + spotify_id + ) + track = self._get_track(track_data) + + # Get album's tracks to set `track.index` (position on the entire + # release) and `track.medium_total` (total number of tracks on + # the track's disc). + album_data = self._handle_response( + requests.get, self.album_url + track_data['album']['id'] + ) + medium_total = 0 + for i, track_data in enumerate(album_data['tracks']['items'], start=1): + if track_data['disc_number'] == track.medium: + medium_total += 1 + if track_data['id'] == track.track_id: + track.index = i + track.medium_total = medium_total + return track + + @staticmethod + def _construct_search_query(filters=None, keywords=''): + """Construct a query string with the specified filters and keywords to + be provided to the Spotify Search API + (https://developer.spotify.com/documentation/web-api/reference/search/search/#writing-a-query---guidelines). + + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: Query string to be provided to the Search API. + :rtype: str + """ + query_components = [ + keywords, + ' '.join(':'.join((k, v)) for k, v in filters.items()), + ] + query = ' '.join([q for q in query_components if q]) + if not isinstance(query, str): + query = query.decode('utf8') + return unidecode.unidecode(query) + + def _search_api(self, query_type, filters=None, keywords=''): + """Query the Spotify Search API for the specified ``keywords``, applying + the provided ``filters``. + + :param query_type: Item type to search across. Valid types are: + 'album', 'artist', 'playlist', and 'track'. + :type query_type: str + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: JSON data for the class:`Response ` object or None + if no search results are returned. + :rtype: dict or None + """ + query = self._construct_search_query( + keywords=keywords, filters=filters + ) + if not query: + return None + self._log.debug( + f"Searching {self.data_source} for '{query}'" + ) + response_data = ( + self._handle_response( + requests.get, + self.search_url, + params={'q': query, 'type': query_type}, + ) + .get(query_type + 's', {}) + .get('items', []) + ) + self._log.debug( + "Found {} result(s) from {} for '{}'", + len(response_data), + self.data_source, + query, + ) + return response_data def commands(self): def queries(lib, opts, args): - success = self.parse_opts(opts) + success = self._parse_opts(opts) if success: - results = self.query_spotify(lib, decargs(args)) - self.output_results(results) + results = self._match_library_tracks(lib, ui.decargs(args)) + self._output_match_results(results) + spotify_cmd = ui.Subcommand( - 'spotify', - help=u'build a Spotify playlist' + 'spotify', help=f'build a {self.data_source} playlist' ) spotify_cmd.parser.add_option( - u'-m', u'--mode', action='store', - help=u'"open" to open Spotify with playlist, ' - u'"list" to print (default)' + '-m', + '--mode', + action='store', + help='"open" to open {} with playlist, ' + '"list" to print (default)'.format(self.data_source), ) spotify_cmd.parser.add_option( - u'-f', u'--show-failures', - action='store_true', dest='show_failures', - help=u'list tracks that did not match a Spotify ID' + '-f', + '--show-failures', + action='store_true', + dest='show_failures', + help='list tracks that did not match a {} ID'.format( + self.data_source + ), ) spotify_cmd.func = queries return [spotify_cmd] - def parse_opts(self, opts): + def _parse_opts(self, opts): if opts.mode: self.config['mode'].set(opts.mode) @@ -63,35 +381,47 @@ class SpotifyPlugin(BeetsPlugin): self.config['show_failures'].set(True) if self.config['mode'].get() not in ['list', 'open']: - self._log.warning(u'{0} is not a valid mode', - self.config['mode'].get()) + self._log.warning( + '{0} is not a valid mode', self.config['mode'].get() + ) return False self.opts = opts return True - def query_spotify(self, lib, query): + def _match_library_tracks(self, library, keywords): + """Get a list of simplified track object dicts for library tracks + matching the specified ``keywords``. + :param library: beets library object to query. + :type library: beets.library.Library + :param keywords: Query to match library items against. + :type keywords: str + :return: List of simplified track object dicts for library items + matching the specified query. + :rtype: list[dict] + """ results = [] failures = [] - items = lib.items(query) + items = library.items(keywords) if not items: - self._log.debug(u'Your beets query returned no items, ' - u'skipping spotify') + self._log.debug( + 'Your beets query returned no items, skipping {}.', + self.data_source, + ) return - self._log.info(u'Processing {0} tracks...', len(items)) + self._log.info('Processing {} tracks...', len(items)) for item in items: - # Apply regex transformations if provided for regex in self.config['regex'].get(): if ( - not regex['field'] or - not regex['search'] or - not regex['replace'] + not regex['field'] + or not regex['search'] + or not regex['replace'] ): continue @@ -103,73 +433,95 @@ class SpotifyPlugin(BeetsPlugin): # Custom values can be passed in the config (just in case) artist = item[self.config['artist_field'].get()] album = item[self.config['album_field'].get()] - query = item[self.config['track_field'].get()] - search_url = query + " album:" + album + " artist:" + artist + keywords = item[self.config['track_field'].get()] # Query the Web API for each track, look for the items' JSON data - r = requests.get(self.base_url, params={ - "q": search_url, "type": "track" - }) - self._log.debug('{}', r.url) - try: - r.raise_for_status() - except HTTPError as e: - self._log.debug(u'URL returned a {0} error', - e.response.status_code) - failures.append(search_url) + query_filters = {'artist': artist, 'album': album} + response_data_tracks = self._search_api( + query_type='track', keywords=keywords, filters=query_filters + ) + if not response_data_tracks: + query = self._construct_search_query( + keywords=keywords, filters=query_filters + ) + failures.append(query) continue - r_data = r.json()['tracks']['items'] - # Apply market filter if requested region_filter = self.config['region_filter'].get() if region_filter: - r_data = [x for x in r_data if region_filter - in x['available_markets']] + response_data_tracks = [ + track_data + for track_data in response_data_tracks + if region_filter in track_data['available_markets'] + ] - # Simplest, take the first result - chosen_result = None - if len(r_data) == 1 or self.config['tiebreak'].get() == "first": - self._log.debug(u'Spotify track(s) found, count: {0}', - len(r_data)) - chosen_result = r_data[0] - elif len(r_data) > 1: - # Use the popularity filter - self._log.debug(u'Most popular track chosen, count: {0}', - len(r_data)) - chosen_result = max(r_data, key=lambda x: x['popularity']) - - if chosen_result: - results.append(chosen_result) + if ( + len(response_data_tracks) == 1 + or self.config['tiebreak'].get() == 'first' + ): + self._log.debug( + '{} track(s) found, count: {}', + self.data_source, + len(response_data_tracks), + ) + chosen_result = response_data_tracks[0] else: - self._log.debug(u'No spotify track found: {0}', search_url) - failures.append(search_url) + # Use the popularity filter + self._log.debug( + 'Most popular track chosen, count: {}', + len(response_data_tracks), + ) + chosen_result = max( + response_data_tracks, key=lambda x: x['popularity'] + ) + results.append(chosen_result) failure_count = len(failures) if failure_count > 0: if self.config['show_failures'].get(): - self._log.info(u'{0} track(s) did not match a Spotify ID:', - failure_count) + self._log.info( + '{} track(s) did not match a {} ID:', + failure_count, + self.data_source, + ) for track in failures: - self._log.info(u'track: {0}', track) - self._log.info(u'') + self._log.info('track: {}', track) + self._log.info('') else: - self._log.warning(u'{0} track(s) did not match a Spotify ID;\n' - u'use --show-failures to display', - failure_count) + self._log.warning( + '{} track(s) did not match a {} ID:\n' + 'use --show-failures to display', + failure_count, + self.data_source, + ) return results - def output_results(self, results): - if results: - ids = [x['id'] for x in results] - if self.config['mode'].get() == "open": - self._log.info(u'Attempting to open Spotify with playlist') - spotify_url = self.playlist_partial + ",".join(ids) - webbrowser.open(spotify_url) + def _output_match_results(self, results): + """Open a playlist or print Spotify URLs for the provided track + object dicts. + :param results: List of simplified track object dicts + (https://developer.spotify.com/documentation/web-api/reference/object-model/#track-object-simplified) + :type results: list[dict] + """ + if results: + spotify_ids = [track_data['id'] for track_data in results] + if self.config['mode'].get() == 'open': + self._log.info( + 'Attempting to open {} with playlist'.format( + self.data_source + ) + ) + spotify_url = 'spotify:trackset:Playlist:' + ','.join( + spotify_ids + ) + webbrowser.open(spotify_url) else: - for item in ids: - print(self.open_url + item) + for spotify_id in spotify_ids: + print(self.open_track_url + spotify_id) else: - self._log.warning(u'No Spotify tracks found from beets query') + self._log.warning( + f'No {self.data_source} tracks found from beets query' + ) diff --git a/libs/common/beetsplug/subsonicplaylist.py b/libs/common/beetsplug/subsonicplaylist.py new file mode 100644 index 00000000..ead78919 --- /dev/null +++ b/libs/common/beetsplug/subsonicplaylist.py @@ -0,0 +1,171 @@ +# This file is part of beets. +# Copyright 2019, Joris Jensen +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +import random +import string +from xml.etree import ElementTree +from hashlib import md5 +from urllib.parse import urlencode + +import requests + +from beets.dbcore import AndQuery +from beets.dbcore.query import MatchQuery +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand + +__author__ = 'https://github.com/MrNuggelz' + + +def filter_to_be_removed(items, keys): + if len(items) > len(keys): + dont_remove = [] + for artist, album, title in keys: + for item in items: + if artist == item['artist'] and \ + album == item['album'] and \ + title == item['title']: + dont_remove.append(item) + return [item for item in items if item not in dont_remove] + else: + def to_be_removed(item): + for artist, album, title in keys: + if artist == item['artist'] and\ + album == item['album'] and\ + title == item['title']: + return False + return True + + return [item for item in items if to_be_removed(item)] + + +class SubsonicPlaylistPlugin(BeetsPlugin): + + def __init__(self): + super().__init__() + self.config.add( + { + 'delete': False, + 'playlist_ids': [], + 'playlist_names': [], + 'username': '', + 'password': '' + } + ) + self.config['password'].redact = True + + def update_tags(self, playlist_dict, lib): + with lib.transaction(): + for query, playlist_tag in playlist_dict.items(): + query = AndQuery([MatchQuery("artist", query[0]), + MatchQuery("album", query[1]), + MatchQuery("title", query[2])]) + items = lib.items(query) + if not items: + self._log.warn("{} | track not found ({})", playlist_tag, + query) + continue + for item in items: + item.subsonic_playlist = playlist_tag + item.try_sync(write=True, move=False) + + def get_playlist(self, playlist_id): + xml = self.send('getPlaylist', {'id': playlist_id}).text + playlist = ElementTree.fromstring(xml)[0] + if playlist.attrib.get('code', '200') != '200': + alt_error = 'error getting playlist, but no error message found' + self._log.warn(playlist.attrib.get('message', alt_error)) + return + + name = playlist.attrib.get('name', 'undefined') + tracks = [(t.attrib['artist'], t.attrib['album'], t.attrib['title']) + for t in playlist] + return name, tracks + + def commands(self): + def build_playlist(lib, opts, args): + self.config.set_args(opts) + ids = self.config['playlist_ids'].as_str_seq() + if self.config['playlist_names'].as_str_seq(): + playlists = ElementTree.fromstring( + self.send('getPlaylists').text)[0] + if playlists.attrib.get('code', '200') != '200': + alt_error = 'error getting playlists,' \ + ' but no error message found' + self._log.warn( + playlists.attrib.get('message', alt_error)) + return + for name in self.config['playlist_names'].as_str_seq(): + for playlist in playlists: + if name == playlist.attrib['name']: + ids.append(playlist.attrib['id']) + + playlist_dict = self.get_playlists(ids) + + # delete old tags + if self.config['delete']: + existing = list(lib.items('subsonic_playlist:";"')) + to_be_removed = filter_to_be_removed( + existing, + playlist_dict.keys()) + for item in to_be_removed: + item['subsonic_playlist'] = '' + with lib.transaction(): + item.try_sync(write=True, move=False) + + self.update_tags(playlist_dict, lib) + + subsonicplaylist_cmds = Subcommand( + 'subsonicplaylist', help='import a subsonic playlist' + ) + subsonicplaylist_cmds.parser.add_option( + '-d', + '--delete', + action='store_true', + help='delete tag from items not in any playlist anymore', + ) + subsonicplaylist_cmds.func = build_playlist + return [subsonicplaylist_cmds] + + def generate_token(self): + salt = ''.join(random.choices(string.ascii_lowercase + string.digits)) + return md5( + (self.config['password'].get() + salt).encode()).hexdigest(), salt + + def send(self, endpoint, params=None): + if params is None: + params = {} + a, b = self.generate_token() + params['u'] = self.config['username'] + params['t'] = a + params['s'] = b + params['v'] = '1.12.0' + params['c'] = 'beets' + resp = requests.get('{}/rest/{}?{}'.format( + self.config['base_url'].get(), + endpoint, + urlencode(params)) + ) + return resp + + def get_playlists(self, ids): + output = {} + for playlist_id in ids: + name, tracks = self.get_playlist(playlist_id) + for track in tracks: + if track not in output: + output[track] = ';' + output[track] += name + ';' + return output diff --git a/libs/common/beetsplug/subsonicupdate.py b/libs/common/beetsplug/subsonicupdate.py new file mode 100644 index 00000000..9480bcb4 --- /dev/null +++ b/libs/common/beetsplug/subsonicupdate.py @@ -0,0 +1,144 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Updates Subsonic library on Beets import +Your Beets configuration file should contain +a "subsonic" section like the following: + subsonic: + url: https://mydomain.com:443/subsonic + user: username + pass: password + auth: token +For older Subsonic versions, token authentication +is not supported, use password instead: + subsonic: + url: https://mydomain.com:443/subsonic + user: username + pass: password + auth: pass +""" + +import hashlib +import random +import string + +import requests + +from binascii import hexlify +from beets import config +from beets.plugins import BeetsPlugin + +__author__ = 'https://github.com/maffo999' + + +class SubsonicUpdate(BeetsPlugin): + def __init__(self): + super().__init__() + # Set default configuration values + config['subsonic'].add({ + 'user': 'admin', + 'pass': 'admin', + 'url': 'http://localhost:4040', + 'auth': 'token', + }) + config['subsonic']['pass'].redact = True + self.register_listener('import', self.start_scan) + + @staticmethod + def __create_token(): + """Create salt and token from given password. + + :return: The generated salt and hashed token + """ + password = config['subsonic']['pass'].as_str() + + # Pick the random sequence and salt the password + r = string.ascii_letters + string.digits + salt = "".join([random.choice(r) for _ in range(6)]) + salted_password = password + salt + token = hashlib.md5(salted_password.encode('utf-8')).hexdigest() + + # Put together the payload of the request to the server and the URL + return salt, token + + @staticmethod + def __format_url(endpoint): + """Get the Subsonic URL to trigger the given endpoint. + Uses either the url config option or the deprecated host, port, + and context_path config options together. + + :return: Endpoint for updating Subsonic + """ + + url = config['subsonic']['url'].as_str() + if url and url.endswith('/'): + url = url[:-1] + + # @deprecated("Use url config option instead") + if not url: + host = config['subsonic']['host'].as_str() + port = config['subsonic']['port'].get(int) + context_path = config['subsonic']['contextpath'].as_str() + if context_path == '/': + context_path = '' + url = f"http://{host}:{port}{context_path}" + + return url + f'/rest/{endpoint}' + + def start_scan(self): + user = config['subsonic']['user'].as_str() + auth = config['subsonic']['auth'].as_str() + url = self.__format_url("startScan") + self._log.debug('URL is {0}', url) + self._log.debug('auth type is {0}', config['subsonic']['auth']) + + if auth == "token": + salt, token = self.__create_token() + payload = { + 'u': user, + 't': token, + 's': salt, + 'v': '1.13.0', # Subsonic 5.3 and newer + 'c': 'beets', + 'f': 'json' + } + elif auth == "password": + password = config['subsonic']['pass'].as_str() + encpass = hexlify(password.encode()).decode() + payload = { + 'u': user, + 'p': f'enc:{encpass}', + 'v': '1.12.0', + 'c': 'beets', + 'f': 'json' + } + else: + return + try: + response = requests.get(url, params=payload) + json = response.json() + + if response.status_code == 200 and \ + json['subsonic-response']['status'] == "ok": + count = json['subsonic-response']['scanStatus']['count'] + self._log.info( + f'Updating Subsonic; scanning {count} tracks') + elif response.status_code == 200 and \ + json['subsonic-response']['status'] == "failed": + error_message = json['subsonic-response']['error']['message'] + self._log.error(f'Error: {error_message}') + else: + self._log.error('Error: {0}', json) + except Exception as error: + self._log.error(f'Error: {error}') diff --git a/libs/common/beetsplug/the.py b/libs/common/beetsplug/the.py index cfb583ce..e6626d2b 100644 --- a/libs/common/beetsplug/the.py +++ b/libs/common/beetsplug/the.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Blemjhoo Tezoulbr . # @@ -15,7 +14,6 @@ """Moves patterns in path formats (suitable for moving articles).""" -from __future__ import division, absolute_import, print_function import re from beets.plugins import BeetsPlugin @@ -23,9 +21,9 @@ from beets.plugins import BeetsPlugin __author__ = 'baobab@heresiarch.info' __version__ = '1.1' -PATTERN_THE = u'^[the]{3}\s' -PATTERN_A = u'^[a][n]?\s' -FORMAT = u'{0}, {1}' +PATTERN_THE = '^the\\s' +PATTERN_A = '^[a][n]?\\s' +FORMAT = '{0}, {1}' class ThePlugin(BeetsPlugin): @@ -33,14 +31,14 @@ class ThePlugin(BeetsPlugin): patterns = [] def __init__(self): - super(ThePlugin, self).__init__() + super().__init__() self.template_funcs['the'] = self.the_template_func self.config.add({ 'the': True, 'a': True, - 'format': u'{0}, {1}', + 'format': '{0}, {1}', 'strip': False, 'patterns': [], }) @@ -51,17 +49,17 @@ class ThePlugin(BeetsPlugin): try: re.compile(p) except re.error: - self._log.error(u'invalid pattern: {0}', p) + self._log.error('invalid pattern: {0}', p) else: if not (p.startswith('^') or p.endswith('$')): - self._log.warning(u'warning: \"{0}\" will not ' - u'match string start/end', p) + self._log.warning('warning: \"{0}\" will not ' + 'match string start/end', p) if self.config['a']: self.patterns = [PATTERN_A] + self.patterns if self.config['the']: self.patterns = [PATTERN_THE] + self.patterns if not self.patterns: - self._log.warning(u'no patterns defined!') + self._log.warning('no patterns defined!') def unthe(self, text, pattern): """Moves pattern in the path format string or strips it @@ -84,7 +82,7 @@ class ThePlugin(BeetsPlugin): fmt = self.config['format'].as_str() return fmt.format(r, t.strip()).strip() else: - return u'' + return '' def the_template_func(self, text): if not self.patterns: @@ -93,8 +91,8 @@ class ThePlugin(BeetsPlugin): for p in self.patterns: r = self.unthe(text, p) if r != text: + self._log.debug('\"{0}\" -> \"{1}\"', text, r) break - self._log.debug(u'\"{0}\" -> \"{1}\"', text, r) return r else: - return u'' + return '' diff --git a/libs/common/beetsplug/thumbnails.py b/libs/common/beetsplug/thumbnails.py index 04845e88..6bd9cbac 100644 --- a/libs/common/beetsplug/thumbnails.py +++ b/libs/common/beetsplug/thumbnails.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Bruno Cauet # @@ -19,7 +18,6 @@ This plugin is POSIX-only. Spec: standards.freedesktop.org/thumbnail-spec/latest/index.html """ -from __future__ import division, absolute_import, print_function from hashlib import md5 import os @@ -35,7 +33,6 @@ from beets.plugins import BeetsPlugin from beets.ui import Subcommand, decargs from beets import util from beets.util.artresizer import ArtResizer, get_im_version, get_pil_version -import six BASE_DIR = os.path.join(BaseDirectory.xdg_cache_home, "thumbnails") @@ -45,7 +42,7 @@ LARGE_DIR = util.bytestring_path(os.path.join(BASE_DIR, "large")) class ThumbnailsPlugin(BeetsPlugin): def __init__(self): - super(ThumbnailsPlugin, self).__init__() + super().__init__() self.config.add({ 'auto': True, 'force': False, @@ -58,15 +55,15 @@ class ThumbnailsPlugin(BeetsPlugin): def commands(self): thumbnails_command = Subcommand("thumbnails", - help=u"Create album thumbnails") + help="Create album thumbnails") thumbnails_command.parser.add_option( - u'-f', u'--force', + '-f', '--force', dest='force', action='store_true', default=False, - help=u'force regeneration of thumbnails deemed fine (existing & ' - u'recent enough)') + help='force regeneration of thumbnails deemed fine (existing & ' + 'recent enough)') thumbnails_command.parser.add_option( - u'--dolphin', dest='dolphin', action='store_true', default=False, - help=u"create Dolphin-compatible thumbnail information (for KDE)") + '--dolphin', dest='dolphin', action='store_true', default=False, + help="create Dolphin-compatible thumbnail information (for KDE)") thumbnails_command.func = self.process_query return [thumbnails_command] @@ -85,8 +82,8 @@ class ThumbnailsPlugin(BeetsPlugin): - detect whether we'll use GIO or Python to get URIs """ if not ArtResizer.shared.local: - self._log.warning(u"No local image resizing capabilities, " - u"cannot generate thumbnails") + self._log.warning("No local image resizing capabilities, " + "cannot generate thumbnails") return False for dir in (NORMAL_DIR, LARGE_DIR): @@ -100,12 +97,12 @@ class ThumbnailsPlugin(BeetsPlugin): assert get_pil_version() # since we're local self.write_metadata = write_metadata_pil tool = "PIL" - self._log.debug(u"using {0} to write metadata", tool) + self._log.debug("using {0} to write metadata", tool) uri_getter = GioURI() if not uri_getter.available: uri_getter = PathlibURI() - self._log.debug(u"using {0.name} to compute URIs", uri_getter) + self._log.debug("using {0.name} to compute URIs", uri_getter) self.get_uri = uri_getter.uri return True @@ -113,9 +110,9 @@ class ThumbnailsPlugin(BeetsPlugin): def process_album(self, album): """Produce thumbnails for the album folder. """ - self._log.debug(u'generating thumbnail for {0}', album) + self._log.debug('generating thumbnail for {0}', album) if not album.artpath: - self._log.info(u'album {0} has no art', album) + self._log.info('album {0} has no art', album) return if self.config['dolphin']: @@ -123,7 +120,7 @@ class ThumbnailsPlugin(BeetsPlugin): size = ArtResizer.shared.get_size(album.artpath) if not size: - self._log.warning(u'problem getting the picture size for {0}', + self._log.warning('problem getting the picture size for {0}', album.artpath) return @@ -133,9 +130,9 @@ class ThumbnailsPlugin(BeetsPlugin): wrote &= self.make_cover_thumbnail(album, 128, NORMAL_DIR) if wrote: - self._log.info(u'wrote thumbnail for {0}', album) + self._log.info('wrote thumbnail for {0}', album) else: - self._log.info(u'nothing to do for {0}', album) + self._log.info('nothing to do for {0}', album) def make_cover_thumbnail(self, album, size, target_dir): """Make a thumbnail of given size for `album` and put it in @@ -146,11 +143,11 @@ class ThumbnailsPlugin(BeetsPlugin): if os.path.exists(target) and \ os.stat(target).st_mtime > os.stat(album.artpath).st_mtime: if self.config['force']: - self._log.debug(u"found a suitable {1}x{1} thumbnail for {0}, " - u"forcing regeneration", album, size) + self._log.debug("found a suitable {1}x{1} thumbnail for {0}, " + "forcing regeneration", album, size) else: - self._log.debug(u"{1}x{1} thumbnail for {0} exists and is " - u"recent enough", album, size) + self._log.debug("{1}x{1} thumbnail for {0} exists and is " + "recent enough", album, size) return False resized = ArtResizer.shared.resize(size, album.artpath, util.syspath(target)) @@ -160,23 +157,23 @@ class ThumbnailsPlugin(BeetsPlugin): def thumbnail_file_name(self, path): """Compute the thumbnail file name - See http://standards.freedesktop.org/thumbnail-spec/latest/x227.html + See https://standards.freedesktop.org/thumbnail-spec/latest/x227.html """ uri = self.get_uri(path) hash = md5(uri.encode('utf-8')).hexdigest() - return util.bytestring_path("{0}.png".format(hash)) + return util.bytestring_path(f"{hash}.png") def add_tags(self, album, image_path): """Write required metadata to the thumbnail - See http://standards.freedesktop.org/thumbnail-spec/latest/x142.html + See https://standards.freedesktop.org/thumbnail-spec/latest/x142.html """ mtime = os.stat(album.artpath).st_mtime metadata = {"Thumb::URI": self.get_uri(album.artpath), - "Thumb::MTime": six.text_type(mtime)} + "Thumb::MTime": str(mtime)} try: self.write_metadata(image_path, metadata) except Exception: - self._log.exception(u"could not write metadata to {0}", + self._log.exception("could not write metadata to {0}", util.displayable_path(image_path)) def make_dolphin_cover_thumbnail(self, album): @@ -186,9 +183,9 @@ class ThumbnailsPlugin(BeetsPlugin): artfile = os.path.split(album.artpath)[1] with open(outfilename, 'w') as f: f.write('[Desktop Entry]\n') - f.write('Icon=./{0}'.format(artfile.decode('utf-8'))) + f.write('Icon=./{}'.format(artfile.decode('utf-8'))) f.close() - self._log.debug(u"Wrote file {0}", util.displayable_path(outfilename)) + self._log.debug("Wrote file {0}", util.displayable_path(outfilename)) def write_metadata_im(file, metadata): @@ -211,7 +208,7 @@ def write_metadata_pil(file, metadata): return True -class URIGetter(object): +class URIGetter: available = False name = "Abstract base" @@ -224,7 +221,7 @@ class PathlibURI(URIGetter): name = "Python Pathlib" def uri(self, path): - return PurePosixPath(path).as_uri() + return PurePosixPath(util.py3_path(path)).as_uri() def copy_c_string(c_string): @@ -269,7 +266,7 @@ class GioURI(URIGetter): def uri(self, path): g_file_ptr = self.libgio.g_file_new_for_path(path) if not g_file_ptr: - raise RuntimeError(u"No gfile pointer received for {0}".format( + raise RuntimeError("No gfile pointer received for {}".format( util.displayable_path(path))) try: @@ -278,8 +275,8 @@ class GioURI(URIGetter): self.libgio.g_object_unref(g_file_ptr) if not uri_ptr: self.libgio.g_free(uri_ptr) - raise RuntimeError(u"No URI received from the gfile pointer for " - u"{0}".format(util.displayable_path(path))) + raise RuntimeError("No URI received from the gfile pointer for " + "{}".format(util.displayable_path(path))) try: uri = copy_c_string(uri_ptr) @@ -290,5 +287,5 @@ class GioURI(URIGetter): return uri.decode(util._fsencoding()) except UnicodeDecodeError: raise RuntimeError( - "Could not decode filename from GIO: {!r}".format(uri) + f"Could not decode filename from GIO: {uri!r}" ) diff --git a/libs/common/beetsplug/types.py b/libs/common/beetsplug/types.py index 0c078881..930d5e86 100644 --- a/libs/common/beetsplug/types.py +++ b/libs/common/beetsplug/types.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Thomas Scholtes. # @@ -13,11 +12,10 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets.dbcore import types -from beets.util.confit import ConfigValueError +from confuse import ConfigValueError from beets import library @@ -47,6 +45,6 @@ class TypesPlugin(BeetsPlugin): mytypes[key] = library.DateType() else: raise ConfigValueError( - u"unknown type '{0}' for the '{1}' field" + "unknown type '{}' for the '{}' field" .format(value, key)) return mytypes diff --git a/libs/common/beetsplug/unimported.py b/libs/common/beetsplug/unimported.py new file mode 100644 index 00000000..7714ec83 --- /dev/null +++ b/libs/common/beetsplug/unimported.py @@ -0,0 +1,68 @@ +# This file is part of beets. +# Copyright 2019, Joris Jensen +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +""" +List all files in the library folder which are not listed in the + beets library database, including art files +""" + +import os + +from beets import util +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand, print_ + +__author__ = 'https://github.com/MrNuggelz' + + +class Unimported(BeetsPlugin): + + def __init__(self): + super().__init__() + self.config.add( + { + 'ignore_extensions': [] + } + ) + + def commands(self): + def print_unimported(lib, opts, args): + ignore_exts = [ + ('.' + x).encode() + for x in self.config["ignore_extensions"].as_str_seq() + ] + ignore_dirs = [ + os.path.join(lib.directory, x.encode()) + for x in self.config["ignore_subdirectories"].as_str_seq() + ] + in_folder = { + os.path.join(r, file) + for r, d, f in os.walk(lib.directory) + for file in f + if not any( + [file.endswith(ext) for ext in ignore_exts] + + [r in ignore_dirs] + ) + } + in_library = {x.path for x in lib.items()} + art_files = {x.artpath for x in lib.albums()} + for f in in_folder - in_library - art_files: + print_(util.displayable_path(f)) + + unimported = Subcommand( + 'unimported', + help='list all files in the library folder which are not listed' + ' in the beets library database') + unimported.func = print_unimported + return [unimported] diff --git a/libs/common/beetsplug/web/__init__.py b/libs/common/beetsplug/web/__init__.py index 3cf43ed5..240126e9 100644 --- a/libs/common/beetsplug/web/__init__.py +++ b/libs/common/beetsplug/web/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -14,14 +13,13 @@ # included in all copies or substantial portions of the Software. """A Web interface to beets.""" -from __future__ import division, absolute_import, print_function from beets.plugins import BeetsPlugin from beets import ui from beets import util import beets.library import flask -from flask import g +from flask import g, jsonify from werkzeug.routing import BaseConverter, PathConverter import os from unidecode import unidecode @@ -59,7 +57,10 @@ def _rep(obj, expand=False): return out elif isinstance(obj, beets.library.Album): - del out['artpath'] + if app.config.get('INCLUDE_PATHS', False): + out['artpath'] = util.displayable_path(out['artpath']) + else: + del out['artpath'] if expand: out['items'] = [_rep(item) for item in obj.items()] return out @@ -91,7 +92,20 @@ def is_expand(): return flask.request.args.get('expand') is not None -def resource(name): +def is_delete(): + """Returns whether the current delete request should remove the selected + files. + """ + + return flask.request.args.get('delete') is not None + + +def get_method(): + """Returns the HTTP method of the current request.""" + return flask.request.method + + +def resource(name, patchable=False): """Decorates a function to handle RESTful HTTP requests for a resource. """ def make_responder(retriever): @@ -99,34 +113,98 @@ def resource(name): entities = [retriever(id) for id in ids] entities = [entity for entity in entities if entity] - if len(entities) == 1: - return flask.jsonify(_rep(entities[0], expand=is_expand())) - elif entities: - return app.response_class( - json_generator(entities, root=name), - mimetype='application/json' - ) + if get_method() == "DELETE": + + if app.config.get('READONLY', True): + return flask.abort(405) + + for entity in entities: + entity.remove(delete=is_delete()) + + return flask.make_response(jsonify({'deleted': True}), 200) + + elif get_method() == "PATCH" and patchable: + if app.config.get('READONLY', True): + return flask.abort(405) + + for entity in entities: + entity.update(flask.request.get_json()) + entity.try_sync(True, False) # write, don't move + + if len(entities) == 1: + return flask.jsonify(_rep(entities[0], expand=is_expand())) + elif entities: + return app.response_class( + json_generator(entities, root=name), + mimetype='application/json' + ) + + elif get_method() == "GET": + if len(entities) == 1: + return flask.jsonify(_rep(entities[0], expand=is_expand())) + elif entities: + return app.response_class( + json_generator(entities, root=name), + mimetype='application/json' + ) + else: + return flask.abort(404) + else: - return flask.abort(404) - responder.__name__ = 'get_{0}'.format(name) + return flask.abort(405) + + responder.__name__ = f'get_{name}' + return responder return make_responder -def resource_query(name): +def resource_query(name, patchable=False): """Decorates a function to handle RESTful HTTP queries for resources. """ def make_responder(query_func): def responder(queries): - return app.response_class( - json_generator( - query_func(queries), - root='results', expand=is_expand() - ), - mimetype='application/json' - ) - responder.__name__ = 'query_{0}'.format(name) + entities = query_func(queries) + + if get_method() == "DELETE": + + if app.config.get('READONLY', True): + return flask.abort(405) + + for entity in entities: + entity.remove(delete=is_delete()) + + return flask.make_response(jsonify({'deleted': True}), 200) + + elif get_method() == "PATCH" and patchable: + if app.config.get('READONLY', True): + return flask.abort(405) + + for entity in entities: + entity.update(flask.request.get_json()) + entity.try_sync(True, False) # write, don't move + + return app.response_class( + json_generator(entities, root=name), + mimetype='application/json' + ) + + elif get_method() == "GET": + return app.response_class( + json_generator( + entities, + root='results', expand=is_expand() + ), + mimetype='application/json' + ) + + else: + return flask.abort(405) + + responder.__name__ = f'query_{name}' + return responder + return make_responder @@ -140,7 +218,7 @@ def resource_list(name): json_generator(list_all(), root=name, expand=is_expand()), mimetype='application/json' ) - responder.__name__ = 'all_{0}'.format(name) + responder.__name__ = f'all_{name}' return responder return make_responder @@ -150,7 +228,7 @@ def _get_unique_table_field_values(model, field, sort_field): if field not in model.all_keys() or sort_field not in model.all_keys(): raise KeyError with g.lib.transaction() as tx: - rows = tx.query('SELECT DISTINCT "{0}" FROM "{1}" ORDER BY "{2}"' + rows = tx.query('SELECT DISTINCT "{}" FROM "{}" ORDER BY "{}"' .format(field, model._table, sort_field)) return [row[0] for row in rows] @@ -169,7 +247,7 @@ class IdListConverter(BaseConverter): return ids def to_url(self, value): - return ','.join(value) + return ','.join(str(v) for v in value) class QueryConverter(PathConverter): @@ -177,10 +255,13 @@ class QueryConverter(PathConverter): """ def to_python(self, value): - return value.split('/') + queries = value.split('/') + """Do not do path substitution on regex value tests""" + return [query if '::' in query else query.replace('\\', os.sep) + for query in queries] def to_url(self, value): - return ','.join(value) + return ','.join([v.replace(os.sep, '\\') for v in value]) class EverythingConverter(PathConverter): @@ -202,8 +283,8 @@ def before_request(): # Items. -@app.route('/item/') -@resource('items') +@app.route('/item/', methods=["GET", "DELETE", "PATCH"]) +@resource('items', patchable=True) def get_item(id): return g.lib.get_item(id) @@ -249,8 +330,8 @@ def item_file(item_id): return response -@app.route('/item/query/') -@resource_query('items') +@app.route('/item/query/', methods=["GET", "DELETE", "PATCH"]) +@resource_query('items', patchable=True) def item_query(queries): return g.lib.items(queries) @@ -278,7 +359,7 @@ def item_unique_field_values(key): # Albums. -@app.route('/album/') +@app.route('/album/', methods=["GET", "DELETE"]) @resource('albums') def get_album(id): return g.lib.get_album(id) @@ -291,7 +372,7 @@ def all_albums(): return g.lib.albums() -@app.route('/album/query/') +@app.route('/album/query/', methods=["GET", "DELETE"]) @resource_query('albums') def album_query(queries): return g.lib.albums(queries) @@ -351,20 +432,21 @@ def home(): class WebPlugin(BeetsPlugin): def __init__(self): - super(WebPlugin, self).__init__() + super().__init__() self.config.add({ - 'host': u'127.0.0.1', + 'host': '127.0.0.1', 'port': 8337, 'cors': '', 'cors_supports_credentials': False, 'reverse_proxy': False, 'include_paths': False, + 'readonly': True, }) def commands(self): - cmd = ui.Subcommand('web', help=u'start a Web interface') - cmd.parser.add_option(u'-d', u'--debug', action='store_true', - default=False, help=u'debug mode') + cmd = ui.Subcommand('web', help='start a Web interface') + cmd.parser.add_option('-d', '--debug', action='store_true', + default=False, help='debug mode') def func(lib, opts, args): args = ui.decargs(args) @@ -378,12 +460,13 @@ class WebPlugin(BeetsPlugin): app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False app.config['INCLUDE_PATHS'] = self.config['include_paths'] + app.config['READONLY'] = self.config['readonly'] # Enable CORS if required. if self.config['cors']: - self._log.info(u'Enabling CORS with origin: {0}', + self._log.info('Enabling CORS with origin: {0}', self.config['cors']) - from flask.ext.cors import CORS + from flask_cors import CORS app.config['CORS_ALLOW_HEADERS'] = "Content-Type" app.config['CORS_RESOURCES'] = { r"/*": {"origins": self.config['cors'].get(str)} @@ -407,7 +490,7 @@ class WebPlugin(BeetsPlugin): return [cmd] -class ReverseProxied(object): +class ReverseProxied: '''Wrap the application in this middleware and configure the front-end server to add these headers, to let you quietly bind this to a URL other than / and to an HTTP scheme that is diff --git a/libs/common/beetsplug/web/static/beets.js b/libs/common/beetsplug/web/static/beets.js index 51985c18..97af7011 100644 --- a/libs/common/beetsplug/web/static/beets.js +++ b/libs/common/beetsplug/web/static/beets.js @@ -129,7 +129,7 @@ $.fn.player = function(debug) { // Simple selection disable for jQuery. // Cut-and-paste from: -// http://stackoverflow.com/questions/2700000 +// https://stackoverflow.com/questions/2700000 $.fn.disableSelection = function() { $(this).attr('unselectable', 'on') .css('-moz-user-select', 'none') diff --git a/libs/common/beetsplug/zero.py b/libs/common/beetsplug/zero.py index 022c2c72..f05b1b5a 100644 --- a/libs/common/beetsplug/zero.py +++ b/libs/common/beetsplug/zero.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Blemjhoo Tezoulbr . # @@ -15,23 +14,21 @@ """ Clears tag fields in media files.""" -from __future__ import division, absolute_import, print_function -import six import re from beets.plugins import BeetsPlugin -from beets.mediafile import MediaFile +from mediafile import MediaFile from beets.importer import action from beets.ui import Subcommand, decargs, input_yn -from beets.util import confit +import confuse __author__ = 'baobab@heresiarch.info' class ZeroPlugin(BeetsPlugin): def __init__(self): - super(ZeroPlugin, self).__init__() + super().__init__() self.register_listener('write', self.write_event) self.register_listener('import_task_choice', @@ -56,7 +53,7 @@ class ZeroPlugin(BeetsPlugin): """ if self.config['fields'] and self.config['keep_fields']: self._log.warning( - u'cannot blacklist and whitelist at the same time' + 'cannot blacklist and whitelist at the same time' ) # Blacklist mode. elif self.config['fields']: @@ -75,7 +72,7 @@ class ZeroPlugin(BeetsPlugin): def zero_fields(lib, opts, args): if not decargs(args) and not input_yn( - u"Remove fields for all items? (Y/n)", + "Remove fields for all items? (Y/n)", True): return for item in lib.items(decargs(args)): @@ -89,22 +86,22 @@ class ZeroPlugin(BeetsPlugin): Do some sanity checks then compile the regexes. """ if field not in MediaFile.fields(): - self._log.error(u'invalid field: {0}', field) + self._log.error('invalid field: {0}', field) elif field in ('id', 'path', 'album_id'): - self._log.warning(u'field \'{0}\' ignored, zeroing ' - u'it would be dangerous', field) + self._log.warning('field \'{0}\' ignored, zeroing ' + 'it would be dangerous', field) else: try: for pattern in self.config[field].as_str_seq(): prog = re.compile(pattern, re.IGNORECASE) self.fields_to_progs.setdefault(field, []).append(prog) - except confit.NotFoundError: + except confuse.NotFoundError: # Matches everything self.fields_to_progs[field] = [] def import_task_choice_event(self, session, task): if task.choice_flag == action.ASIS and not self.warned: - self._log.warning(u'cannot zero in \"as-is\" mode') + self._log.warning('cannot zero in \"as-is\" mode') self.warned = True # TODO request write in as-is mode @@ -122,7 +119,7 @@ class ZeroPlugin(BeetsPlugin): fields_set = False if not self.fields_to_progs: - self._log.warning(u'no fields, nothing to do') + self._log.warning('no fields, nothing to do') return False for field, progs in self.fields_to_progs.items(): @@ -135,7 +132,7 @@ class ZeroPlugin(BeetsPlugin): if match: fields_set = True - self._log.debug(u'{0}: {1} -> None', field, value) + self._log.debug('{0}: {1} -> None', field, value) tags[field] = None if self.config['update_database']: item[field] = None @@ -158,6 +155,6 @@ def _match_progs(value, progs): if not progs: return True for prog in progs: - if prog.search(six.text_type(value)): + if prog.search(str(value)): return True return False diff --git a/libs/common/bin/beet.exe b/libs/common/bin/beet.exe index e91e175e..a4e9eb61 100644 Binary files a/libs/common/bin/beet.exe and b/libs/common/bin/beet.exe differ diff --git a/libs/common/bin/chardetect.exe b/libs/common/bin/chardetect.exe index 17242a80..cbee2d0a 100644 Binary files a/libs/common/bin/chardetect.exe and b/libs/common/bin/chardetect.exe differ diff --git a/libs/common/bin/easy_install-3.7.exe b/libs/common/bin/easy_install-3.7.exe index ba897f33..ca60fc91 100644 Binary files a/libs/common/bin/easy_install-3.7.exe and b/libs/common/bin/easy_install-3.7.exe differ diff --git a/libs/common/bin/easy_install.exe b/libs/common/bin/easy_install.exe index ba897f33..ca60fc91 100644 Binary files a/libs/common/bin/easy_install.exe and b/libs/common/bin/easy_install.exe differ diff --git a/libs/common/bin/guessit.exe b/libs/common/bin/guessit.exe index 099b3bfb..3f453ac0 100644 Binary files a/libs/common/bin/guessit.exe and b/libs/common/bin/guessit.exe differ diff --git a/libs/common/bin/mid3cp b/libs/common/bin/mid3cp deleted file mode 100644 index 8a773e56..00000000 --- a/libs/common/bin/mid3cp +++ /dev/null @@ -1,16 +0,0 @@ -#!C:\Python\3.7\python.exe -# -*- coding: utf-8 -*- -# Copyright 2016 Christoph Reiter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -import sys - -from mutagen._tools.mid3cp import entry_point - - -if __name__ == "__main__": - sys.exit(entry_point()) diff --git a/libs/common/bin/mid3cp.exe b/libs/common/bin/mid3cp.exe new file mode 100644 index 00000000..82640c70 Binary files /dev/null and b/libs/common/bin/mid3cp.exe differ diff --git a/libs/common/bin/mid3iconv b/libs/common/bin/mid3iconv deleted file mode 100644 index 332f6b70..00000000 --- a/libs/common/bin/mid3iconv +++ /dev/null @@ -1,16 +0,0 @@ -#!C:\Python\3.7\python.exe -# -*- coding: utf-8 -*- -# Copyright 2016 Christoph Reiter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -import sys - -from mutagen._tools.mid3iconv import entry_point - - -if __name__ == "__main__": - sys.exit(entry_point()) diff --git a/libs/common/bin/mid3iconv.exe b/libs/common/bin/mid3iconv.exe new file mode 100644 index 00000000..95fc1797 Binary files /dev/null and b/libs/common/bin/mid3iconv.exe differ diff --git a/libs/common/bin/mid3v2 b/libs/common/bin/mid3v2 deleted file mode 100644 index 1bf2d13d..00000000 --- a/libs/common/bin/mid3v2 +++ /dev/null @@ -1,16 +0,0 @@ -#!C:\Python\3.7\python.exe -# -*- coding: utf-8 -*- -# Copyright 2016 Christoph Reiter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -import sys - -from mutagen._tools.mid3v2 import entry_point - - -if __name__ == "__main__": - sys.exit(entry_point()) diff --git a/libs/common/bin/mid3v2.exe b/libs/common/bin/mid3v2.exe new file mode 100644 index 00000000..2c7a099a Binary files /dev/null and b/libs/common/bin/mid3v2.exe differ diff --git a/libs/common/bin/moggsplit b/libs/common/bin/moggsplit deleted file mode 100644 index f43d1360..00000000 --- a/libs/common/bin/moggsplit +++ /dev/null @@ -1,16 +0,0 @@ -#!C:\Python\3.7\python.exe -# -*- coding: utf-8 -*- -# Copyright 2016 Christoph Reiter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -import sys - -from mutagen._tools.moggsplit import entry_point - - -if __name__ == "__main__": - sys.exit(entry_point()) diff --git a/libs/common/bin/moggsplit.exe b/libs/common/bin/moggsplit.exe new file mode 100644 index 00000000..1160cbce Binary files /dev/null and b/libs/common/bin/moggsplit.exe differ diff --git a/libs/common/bin/mutagen-inspect b/libs/common/bin/mutagen-inspect deleted file mode 100644 index 746b414e..00000000 --- a/libs/common/bin/mutagen-inspect +++ /dev/null @@ -1,16 +0,0 @@ -#!C:\Python\3.7\python.exe -# -*- coding: utf-8 -*- -# Copyright 2016 Christoph Reiter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -import sys - -from mutagen._tools.mutagen_inspect import entry_point - - -if __name__ == "__main__": - sys.exit(entry_point()) diff --git a/libs/common/bin/mutagen-inspect.exe b/libs/common/bin/mutagen-inspect.exe new file mode 100644 index 00000000..4f587d6b Binary files /dev/null and b/libs/common/bin/mutagen-inspect.exe differ diff --git a/libs/common/bin/mutagen-pony b/libs/common/bin/mutagen-pony deleted file mode 100644 index a289a988..00000000 --- a/libs/common/bin/mutagen-pony +++ /dev/null @@ -1,16 +0,0 @@ -#!C:\Python\3.7\python.exe -# -*- coding: utf-8 -*- -# Copyright 2016 Christoph Reiter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -import sys - -from mutagen._tools.mutagen_pony import entry_point - - -if __name__ == "__main__": - sys.exit(entry_point()) diff --git a/libs/common/bin/mutagen-pony.exe b/libs/common/bin/mutagen-pony.exe new file mode 100644 index 00000000..eacd1950 Binary files /dev/null and b/libs/common/bin/mutagen-pony.exe differ diff --git a/libs/common/bin/pbr.exe b/libs/common/bin/pbr.exe index e7eab92c..3c2f8c28 100644 Binary files a/libs/common/bin/pbr.exe and b/libs/common/bin/pbr.exe differ diff --git a/libs/common/bin/srt.exe b/libs/common/bin/srt.exe index 90e6494d..a7dae560 100644 Binary files a/libs/common/bin/srt.exe and b/libs/common/bin/srt.exe differ diff --git a/libs/common/bin/subliminal.exe b/libs/common/bin/subliminal.exe index 280ce315..114c64d9 100644 Binary files a/libs/common/bin/subliminal.exe and b/libs/common/bin/subliminal.exe differ diff --git a/libs/common/bin/unidecode.exe b/libs/common/bin/unidecode.exe index 0880f1b8..9b1c0df9 100644 Binary files a/libs/common/bin/unidecode.exe and b/libs/common/bin/unidecode.exe differ diff --git a/libs/common/bs4/__init__.py b/libs/common/bs4/__init__.py index 797a6826..b3c9feb8 100644 --- a/libs/common/bs4/__init__.py +++ b/libs/common/bs4/__init__.py @@ -1,6 +1,5 @@ -"""Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" +"""Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend". + http://www.crummy.com/software/BeautifulSoup/ Beautiful Soup uses a pluggable XML or HTML parser to parse a @@ -8,32 +7,38 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a provides methods and Pythonic idioms that make it easy to navigate, search, and modify the parse tree. -Beautiful Soup works with Python 2.7 and up. It works better if lxml +Beautiful Soup works with Python 3.5 and up. It works better if lxml and/or html5lib is installed. For more than you ever wanted to know about Beautiful Soup, see the -documentation: -http://www.crummy.com/software/BeautifulSoup/bs4/doc/ - +documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.6.3" -__copyright__ = "Copyright (c) 2004-2018 Leonard Richardson" +__version__ = "4.11.1" +__copyright__ = "Copyright (c) 2004-2022 Leonard Richardson" +# Use of this source code is governed by the MIT license. __license__ = "MIT" __all__ = ['BeautifulSoup'] +from collections import Counter import os import re import sys import traceback import warnings -from .builder import builder_registry, ParserRejectedMarkup +# The very first thing we do is give a useful error if someone is +# running this code under Python 2. +if sys.version_info.major < 3: + raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.') + +from .builder import ( + builder_registry, + ParserRejectedMarkup, + XMLParsedAsHTMLWarning, +) from .dammit import UnicodeDammit from .element import ( CData, @@ -44,28 +49,49 @@ from .element import ( NavigableString, PageElement, ProcessingInstruction, + PYTHON_SPECIFIC_ENCODINGS, ResultSet, + Script, + Stylesheet, SoupStrainer, Tag, + TemplateString, ) -# The very first thing we do is give a useful error if someone is -# running this code under Python 3 without converting it. -'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' - -class BeautifulSoup(Tag): +# Define some custom warnings. +class GuessedAtParserWarning(UserWarning): + """The warning issued when BeautifulSoup has to guess what parser to + use -- probably because no parser was specified in the constructor. """ - This class defines the basic interface called by the tree builders. - These methods will be called by the parser: - reset() - feed(markup) +class MarkupResemblesLocatorWarning(UserWarning): + """The warning issued when BeautifulSoup is given 'markup' that + actually looks like a resource locator -- a URL or a path to a file + on disk. + """ + + +class BeautifulSoup(Tag): + """A data structure representing a parsed HTML or XML document. + + Most of the methods you'll call on a BeautifulSoup object are inherited from + PageElement or Tag. + + Internally, this class defines the basic interface called by the + tree builders when converting an HTML/XML document into a data + structure. The interface abstracts away the differences between + parsers. To write a new tree builder, you'll need to understand + these methods as a whole. + + These methods will be called by the BeautifulSoup constructor: + * reset() + * feed(markup) The tree builder may call these methods from its feed() implementation: - handle_starttag(name, attrs) # See note about return value - handle_endtag(name) - handle_data(data) # Appends to the current data node - endData(containerClass=NavigableString) # Ends the current data node + * handle_starttag(name, attrs) # See note about return value + * handle_endtag(name) + * handle_data(data) # Appends to the current data node + * endData(containerClass) # Ends the current data node No matter how complicated the underlying parser is, you should be able to build a tree using 'start tag' events, 'end tag' events, @@ -75,56 +101,77 @@ class BeautifulSoup(Tag): like HTML's
tag), call handle_starttag and then handle_endtag. """ + + # Since BeautifulSoup subclasses Tag, it's possible to treat it as + # a Tag with a .name. This name makes it clear the BeautifulSoup + # object isn't a real markup tag. ROOT_TAG_NAME = '[document]' # If the end-user gives no indication which tree builder they # want, look for one with these features. DEFAULT_BUILDER_FEATURES = ['html', 'fast'] + # A string containing all ASCII whitespace characters, used in + # endData() to detect data chunks that seem 'empty'. ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n" - + def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, - **kwargs): + element_classes=None, **kwargs): """Constructor. :param markup: A string or a file-like object representing - markup to be parsed. + markup to be parsed. - :param features: Desirable features of the parser to be used. This - may be the name of a specific parser ("lxml", "lxml-xml", - "html.parser", or "html5lib") or it may be the type of markup - to be used ("html", "html5", "xml"). It's recommended that you - name a specific parser, so that Beautiful Soup gives you the - same results across platforms and virtual environments. + :param features: Desirable features of the parser to be + used. This may be the name of a specific parser ("lxml", + "lxml-xml", "html.parser", or "html5lib") or it may be the + type of markup to be used ("html", "html5", "xml"). It's + recommended that you name a specific parser, so that + Beautiful Soup gives you the same results across platforms + and virtual environments. - :param builder: A specific TreeBuilder to use instead of looking one - up based on `features`. You shouldn't need to use this. + :param builder: A TreeBuilder subclass to instantiate (or + instance to use) instead of looking one up based on + `features`. You only need to use this if you've implemented a + custom TreeBuilder. :param parse_only: A SoupStrainer. Only parts of the document - matching the SoupStrainer will be considered. This is useful - when parsing part of a document that would otherwise be too - large to fit into memory. + matching the SoupStrainer will be considered. This is useful + when parsing part of a document that would otherwise be too + large to fit into memory. :param from_encoding: A string indicating the encoding of the - document to be parsed. Pass this in if Beautiful Soup is - guessing wrongly about the document's encoding. + document to be parsed. Pass this in if Beautiful Soup is + guessing wrongly about the document's encoding. :param exclude_encodings: A list of strings indicating - encodings known to be wrong. Pass this in if you don't know - the document's encoding but you know Beautiful Soup's guess is - wrong. + encodings known to be wrong. Pass this in if you don't know + the document's encoding but you know Beautiful Soup's guess is + wrong. + + :param element_classes: A dictionary mapping BeautifulSoup + classes like Tag and NavigableString, to other classes you'd + like to be instantiated instead as the parse tree is + built. This is useful for subclassing Tag or NavigableString + to modify default behavior. :param kwargs: For backwards compatibility purposes, the - constructor accepts certain keyword arguments used in - Beautiful Soup 3. None of these arguments do anything in - Beautiful Soup 4 and there's no need to actually pass keyword - arguments into the constructor. + constructor accepts certain keyword arguments used in + Beautiful Soup 3. None of these arguments do anything in + Beautiful Soup 4; they will result in a warning and then be + ignored. + + Apart from this, any keyword arguments passed into the + BeautifulSoup constructor are propagated to the TreeBuilder + constructor. This makes it possible to configure a + TreeBuilder by passing in arguments, not just by saying which + one to use. """ - if 'convertEntities' in kwargs: + del kwargs['convertEntities'] warnings.warn( "BS4 does not respect the convertEntities argument to the " "BeautifulSoup constructor. Entities are always converted " @@ -163,10 +210,10 @@ class BeautifulSoup(Tag): if old_name in kwargs: warnings.warn( 'The "%s" argument to the BeautifulSoup constructor ' - 'has been renamed to "%s."' % (old_name, new_name)) - value = kwargs[old_name] - del kwargs[old_name] - return value + 'has been renamed to "%s."' % (old_name, new_name), + DeprecationWarning + ) + return kwargs.pop(old_name) return None parse_only = parse_only or deprecated_argument( @@ -179,13 +226,19 @@ class BeautifulSoup(Tag): warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") from_encoding = None - if len(kwargs) > 0: - arg = list(kwargs.keys()).pop() - raise TypeError( - "__init__() got an unexpected keyword argument '%s'" % arg) + self.element_classes = element_classes or dict() - if builder is None: - original_features = features + # We need this information to track whether or not the builder + # was specified well enough that we can omit the 'you need to + # specify a parser' warning. + original_builder = builder + original_features = features + + if isinstance(builder, type): + # A builder class was passed in; it needs to be instantiated. + builder_class = builder + builder = None + elif builder is None: if isinstance(features, str): features = [features] if features is None or len(features) == 0: @@ -196,9 +249,18 @@ class BeautifulSoup(Tag): "Couldn't find a tree builder with the features you " "requested: %s. Do you need to install a parser library?" % ",".join(features)) - builder = builder_class() - if not (original_features == builder.NAME or - original_features in builder.ALTERNATE_NAMES): + + # At this point either we have a TreeBuilder instance in + # builder, or we have a builder_class that we can instantiate + # with the remaining **kwargs. + if builder is None: + builder = builder_class(**kwargs) + if not original_builder and not ( + original_features == builder.NAME or + original_features in builder.ALTERNATE_NAMES + ) and markup: + # The user did not tell us which TreeBuilder to use, + # and we had to guess. Issue a warning. if builder.is_xml: markup_type = "XML" else: @@ -232,13 +294,18 @@ class BeautifulSoup(Tag): parser=builder.NAME, markup_type=markup_type ) - warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2) - + warnings.warn( + self.NO_PARSER_SPECIFIED_WARNING % values, + GuessedAtParserWarning, stacklevel=2 + ) + else: + if kwargs: + warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.") + self.builder = builder self.is_xml = builder.is_xml self.known_xml = self.is_xml - self.builder.soup = self - + self._namespaces = dict() self.parse_only = parse_only if hasattr(markup, 'read'): # It's a file-type object. @@ -247,49 +314,42 @@ class BeautifulSoup(Tag): (isinstance(markup, bytes) and not b'<' in markup) or (isinstance(markup, str) and not '<' in markup) ): - # Print out warnings for a couple beginner problems + # Issue warnings for a couple beginner problems # involving passing non-markup to Beautiful Soup. # Beautiful Soup will still parse the input as markup, - # just in case that's what the user really wants. - if (isinstance(markup, str) - and not os.path.supports_unicode_filenames): - possible_filename = markup.encode("utf8") - else: - possible_filename = markup - is_file = False - try: - is_file = os.path.exists(possible_filename) - except Exception as e: - # This is almost certainly a problem involving - # characters not valid in filenames on this - # system. Just let it go. - pass - if is_file: - if isinstance(markup, str): - markup = markup.encode("utf8") - warnings.warn( - '"%s" looks like a filename, not markup. You should' - ' probably open this file and pass the filehandle into' - ' Beautiful Soup.' % markup) - self._check_markup_is_url(markup) + # since that is sometimes the intended behavior. + if not self._markup_is_url(markup): + self._markup_resembles_filename(markup) + rejections = [] + success = False for (self.markup, self.original_encoding, self.declared_html_encoding, self.contains_replacement_characters) in ( self.builder.prepare_markup( markup, from_encoding, exclude_encodings=exclude_encodings)): self.reset() + self.builder.initialize_soup(self) try: self._feed() + success = True break - except ParserRejectedMarkup: + except ParserRejectedMarkup as e: + rejections.append(e) pass + if not success: + other_exceptions = [str(e) for e in rejections] + raise ParserRejectedMarkup( + "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions) + ) + # Clear out the markup and remove the builder's circular # reference to this object. self.markup = None self.builder.soup = None def __copy__(self): + """Copy a BeautifulSoup object by converting the document to a string and parsing it again.""" copy = type(self)( self.encode('utf-8'), builder=self.builder, from_encoding='utf-8' ) @@ -304,15 +364,31 @@ class BeautifulSoup(Tag): def __getstate__(self): # Frequently a tree builder can't be pickled. d = dict(self.__dict__) - if 'builder' in d and not self.builder.picklable: + if 'builder' in d and d['builder'] is not None and not self.builder.picklable: d['builder'] = None return d + + @classmethod + def _decode_markup(cls, markup): + """Ensure `markup` is bytes so it's safe to send into warnings.warn. - @staticmethod - def _check_markup_is_url(markup): - """ - Check if markup looks like it's actually a url and raise a warning - if so. Markup can be unicode or str (py2) / bytes (py3). + TODO: warnings.warn had this problem back in 2010 but it might not + anymore. + """ + if isinstance(markup, bytes): + decoded = markup.decode('utf-8', 'replace') + else: + decoded = markup + return decoded + + @classmethod + def _markup_is_url(cls, markup): + """Error-handling method to raise a warning if incoming markup looks + like a URL. + + :param markup: A string. + :return: Whether or not the markup resembles a URL + closely enough to justify a warning. """ if isinstance(markup, bytes): space = b' ' @@ -321,22 +397,54 @@ class BeautifulSoup(Tag): space = ' ' cant_start_with = ("http:", "https:") else: - return + return False if any(markup.startswith(prefix) for prefix in cant_start_with): if not space in markup: - if isinstance(markup, bytes): - decoded_markup = markup.decode('utf-8', 'replace') - else: - decoded_markup = markup warnings.warn( - '"%s" looks like a URL. Beautiful Soup is not an' - ' HTTP client. You should probably use an HTTP client like' - ' requests to get the document behind the URL, and feed' - ' that document to Beautiful Soup.' % decoded_markup + 'The input looks more like a URL than markup. You may want to use' + ' an HTTP client like requests to get the document behind' + ' the URL, and feed that document to Beautiful Soup.', + MarkupResemblesLocatorWarning ) + return True + return False + @classmethod + def _markup_resembles_filename(cls, markup): + """Error-handling method to raise a warning if incoming markup + resembles a filename. + + :param markup: A bytestring or string. + :return: Whether or not the markup resembles a filename + closely enough to justify a warning. + """ + path_characters = '/\\' + extensions = ['.html', '.htm', '.xml', '.xhtml', '.txt'] + if isinstance(markup, bytes): + path_characters = path_characters.encode("utf8") + extensions = [x.encode('utf8') for x in extensions] + filelike = False + if any(x in markup for x in path_characters): + filelike = True + else: + lower = markup.lower() + if any(lower.endswith(ext) for ext in extensions): + filelike = True + if filelike: + warnings.warn( + 'The input looks more like a filename than markup. You may' + ' want to open this file and pass the filehandle into' + ' Beautiful Soup.', + MarkupResemblesLocatorWarning + ) + return True + return False + def _feed(self): + """Internal method that parses previously set markup, creating a large + number of Tag and NavigableString objects. + """ # Convert the document to Unicode. self.builder.reset() @@ -347,49 +455,110 @@ class BeautifulSoup(Tag): self.popTag() def reset(self): + """Reset this object to a state as though it had never parsed any + markup. + """ Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) self.hidden = 1 self.builder.reset() self.current_data = [] self.currentTag = None self.tagStack = [] + self.open_tag_counter = Counter() self.preserve_whitespace_tag_stack = [] + self.string_container_stack = [] self.pushTag(self) - def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, **kwattrs): - """Create a new tag associated with this soup.""" + def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, + sourceline=None, sourcepos=None, **kwattrs): + """Create a new Tag associated with this BeautifulSoup object. + + :param name: The name of the new Tag. + :param namespace: The URI of the new Tag's XML namespace, if any. + :param prefix: The prefix for the new Tag's XML namespace, if any. + :param attrs: A dictionary of this Tag's attribute values; can + be used instead of `kwattrs` for attributes like 'class' + that are reserved words in Python. + :param sourceline: The line number where this tag was + (purportedly) found in its source document. + :param sourcepos: The character position within `sourceline` where this + tag was (purportedly) found. + :param kwattrs: Keyword arguments for the new Tag's attribute values. + + """ kwattrs.update(attrs) - return Tag(None, self.builder, name, namespace, nsprefix, kwattrs) + return self.element_classes.get(Tag, Tag)( + None, self.builder, name, namespace, nsprefix, kwattrs, + sourceline=sourceline, sourcepos=sourcepos + ) - def new_string(self, s, subclass=NavigableString): - """Create a new NavigableString associated with this soup.""" - return subclass(s) + def string_container(self, base_class=None): + container = base_class or NavigableString + + # There may be a general override of NavigableString. + container = self.element_classes.get( + container, container + ) - def insert_before(self, successor): + # On top of that, we may be inside a tag that needs a special + # container class. + if self.string_container_stack and container is NavigableString: + container = self.builder.string_containers.get( + self.string_container_stack[-1].name, container + ) + return container + + def new_string(self, s, subclass=None): + """Create a new NavigableString associated with this BeautifulSoup + object. + """ + container = self.string_container(subclass) + return container(s) + + def insert_before(self, *args): + """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement + it because there is nothing before or after it in the parse tree. + """ raise NotImplementedError("BeautifulSoup objects don't support insert_before().") - def insert_after(self, successor): + def insert_after(self, *args): + """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement + it because there is nothing before or after it in the parse tree. + """ raise NotImplementedError("BeautifulSoup objects don't support insert_after().") def popTag(self): + """Internal method called by _popToTag when a tag is closed.""" tag = self.tagStack.pop() + if tag.name in self.open_tag_counter: + self.open_tag_counter[tag.name] -= 1 if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: self.preserve_whitespace_tag_stack.pop() - #print "Pop", tag.name + if self.string_container_stack and tag == self.string_container_stack[-1]: + self.string_container_stack.pop() + #print("Pop", tag.name) if self.tagStack: self.currentTag = self.tagStack[-1] return self.currentTag def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: + """Internal method called by handle_starttag when a tag is opened.""" + #print("Push", tag.name) + if self.currentTag is not None: self.currentTag.contents.append(tag) self.tagStack.append(tag) self.currentTag = self.tagStack[-1] + if tag.name != self.ROOT_TAG_NAME: + self.open_tag_counter[tag.name] += 1 if tag.name in self.builder.preserve_whitespace_tags: self.preserve_whitespace_tag_stack.append(tag) + if tag.name in self.builder.string_containers: + self.string_container_stack.append(tag) - def endData(self, containerClass=NavigableString): + def endData(self, containerClass=None): + """Method called by the TreeBuilder when the end of a data segment + occurs. + """ if self.current_data: current_data = ''.join(self.current_data) # If whitespace is not preserved, and this string contains @@ -416,72 +585,93 @@ class BeautifulSoup(Tag): not self.parse_only.search(current_data)): return + containerClass = self.string_container(containerClass) o = containerClass(current_data) self.object_was_parsed(o) def object_was_parsed(self, o, parent=None, most_recent_element=None): - """Add an object to the parse tree.""" - parent = parent or self.currentTag - previous_element = most_recent_element or self._most_recent_element + """Method called by the TreeBuilder to integrate an object into the parse tree.""" + if parent is None: + parent = self.currentTag + if most_recent_element is not None: + previous_element = most_recent_element + else: + previous_element = self._most_recent_element next_element = previous_sibling = next_sibling = None if isinstance(o, Tag): next_element = o.next_element next_sibling = o.next_sibling previous_sibling = o.previous_sibling - if not previous_element: + if previous_element is None: previous_element = o.previous_element + fix = parent.next_element is not None + o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) self._most_recent_element = o parent.contents.append(o) - if parent.next_sibling: - # This node is being inserted into an element that has - # already been parsed. Deal with any dangling references. - index = len(parent.contents)-1 - while index >= 0: - if parent.contents[index] is o: - break - index -= 1 - else: - raise ValueError( - "Error building tree: supposedly %r was inserted " - "into %r after the fact, but I don't see it!" % ( - o, parent - ) - ) - if index == 0: - previous_element = parent - previous_sibling = None - else: - previous_element = previous_sibling = parent.contents[index-1] - if index == len(parent.contents)-1: - next_element = parent.next_sibling - next_sibling = None - else: - next_element = next_sibling = parent.contents[index+1] + # Check if we are inserting into an already parsed node. + if fix: + self._linkage_fixer(parent) - o.previous_element = previous_element - if previous_element: - previous_element.next_element = o - o.next_element = next_element - if next_element: - next_element.previous_element = o - o.next_sibling = next_sibling - if next_sibling: - next_sibling.previous_sibling = o - o.previous_sibling = previous_sibling - if previous_sibling: - previous_sibling.next_sibling = o + def _linkage_fixer(self, el): + """Make sure linkage of this fragment is sound.""" + + first = el.contents[0] + child = el.contents[-1] + descendant = child + + if child is first and el.parent is not None: + # Parent should be linked to first child + el.next_element = child + # We are no longer linked to whatever this element is + prev_el = child.previous_element + if prev_el is not None and prev_el is not el: + prev_el.next_element = None + # First child should be linked to the parent, and no previous siblings. + child.previous_element = el + child.previous_sibling = None + + # We have no sibling as we've been appended as the last. + child.next_sibling = None + + # This index is a tag, dig deeper for a "last descendant" + if isinstance(child, Tag) and child.contents: + descendant = child._last_descendant(False) + + # As the final step, link last descendant. It should be linked + # to the parent's next sibling (if found), else walk up the chain + # and find a parent with a sibling. It should have no next sibling. + descendant.next_element = None + descendant.next_sibling = None + target = el + while True: + if target is None: + break + elif target.next_sibling is not None: + descendant.next_element = target.next_sibling + target.next_sibling.previous_element = child + break + target = target.parent def _popToTag(self, name, nsprefix=None, inclusivePop=True): """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - #print "Popping to %s" % name + instance of the given tag. + + If there are no open tags with the given name, nothing will be + popped. + + :param name: Pop up to the most recent tag with this name. + :param nsprefix: The namespace prefix that goes with `name`. + :param inclusivePop: It this is false, pops the tag stack up + to but *not* including the most recent instqance of the + given tag. + + """ + #print("Popping to %s" % name) if name == self.ROOT_TAG_NAME: # The BeautifulSoup object itself can never be popped. return @@ -490,6 +680,8 @@ class BeautifulSoup(Tag): stack_size = len(self.tagStack) for i in range(stack_size - 1, 0, -1): + if not self.open_tag_counter.get(name): + break t = self.tagStack[i] if (name == t.name and nsprefix == t.prefix): if inclusivePop: @@ -499,16 +691,26 @@ class BeautifulSoup(Tag): return most_recently_popped - def handle_starttag(self, name, namespace, nsprefix, attrs): - """Push a start tag on to the stack. + def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None, + sourcepos=None, namespaces=None): + """Called by the tree builder when a new tag is encountered. - If this method returns None, the tag was rejected by the + :param name: Name of the tag. + :param nsprefix: Namespace prefix for the tag. + :param attrs: A dictionary of attribute values. + :param sourceline: The line number where this tag was found in its + source document. + :param sourcepos: The character position within `sourceline` where this + tag was found. + :param namespaces: A dictionary of all namespace prefix mappings + currently in scope in the document. + + If this method returns None, the tag was rejected by an active SoupStrainer. You should proceed as if the tag had not occurred in the document. For instance, if this was a self-closing tag, don't call handle_endtag. """ - - # print "Start tag %s: %s" % (name, attrs) + # print("Start tag %s: %s" % (name, attrs)) self.endData() if (self.parse_only and len(self.tagStack) <= 1 @@ -516,33 +718,53 @@ class BeautifulSoup(Tag): or not self.parse_only.search_tag(name, attrs))): return None - tag = Tag(self, self.builder, name, namespace, nsprefix, attrs, - self.currentTag, self._most_recent_element) + tag = self.element_classes.get(Tag, Tag)( + self, self.builder, name, namespace, nsprefix, attrs, + self.currentTag, self._most_recent_element, + sourceline=sourceline, sourcepos=sourcepos, + namespaces=namespaces + ) if tag is None: return tag - if self._most_recent_element: + if self._most_recent_element is not None: self._most_recent_element.next_element = tag self._most_recent_element = tag self.pushTag(tag) return tag def handle_endtag(self, name, nsprefix=None): - #print "End tag: " + name + """Called by the tree builder when an ending tag is encountered. + + :param name: Name of the tag. + :param nsprefix: Namespace prefix for the tag. + """ + #print("End tag: " + name) self.endData() self._popToTag(name, nsprefix) - + def handle_data(self, data): + """Called by the tree builder when a chunk of textual data is encountered.""" self.current_data.append(data) - + def decode(self, pretty_print=False, eventual_encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal"): - """Returns a string or Unicode representation of this document. - To get Unicode, pass None for encoding.""" + """Returns a string or Unicode representation of the parse tree + as an HTML or XML document. + :param pretty_print: If this is True, indentation will be used to + make the document more readable. + :param eventual_encoding: The encoding of the final document. + If this is None, the document will be a Unicode string. + """ if self.is_xml: # Print the XML declaration encoding_part = '' + if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS: + # This is a special Python encoding; it can't actually + # go into an XML document because it means nothing + # outside of Python. + eventual_encoding = None if eventual_encoding != None: encoding_part = ' encoding="%s"' % eventual_encoding prefix = '\n' % encoding_part @@ -555,7 +777,7 @@ class BeautifulSoup(Tag): return prefix + super(BeautifulSoup, self).decode( indent_level, eventual_encoding, formatter) -# Alias to make it easier to type import: 'from bs4 import _soup' +# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup' _s = BeautifulSoup _soup = BeautifulSoup @@ -566,19 +788,25 @@ class BeautifulStoneSoup(BeautifulSoup): kwargs['features'] = 'xml' warnings.warn( 'The BeautifulStoneSoup class is deprecated. Instead of using ' - 'it, pass features="xml" into the BeautifulSoup constructor.') + 'it, pass features="xml" into the BeautifulSoup constructor.', + DeprecationWarning + ) super(BeautifulStoneSoup, self).__init__(*args, **kwargs) class StopParsing(Exception): + """Exception raised by a TreeBuilder if it's unable to continue parsing.""" pass class FeatureNotFound(ValueError): + """Exception raised by the BeautifulSoup constructor if no parser with the + requested features is found. + """ pass -#By default, act as an HTML pretty-printer. +#If this file is run as a script, act as an HTML pretty-printer. if __name__ == '__main__': import sys soup = BeautifulSoup(sys.stdin) - print(soup.prettify()) + print((soup.prettify())) diff --git a/libs/common/bs4/builder/__init__.py b/libs/common/bs4/builder/__init__.py index b80ad684..9f789f3e 100644 --- a/libs/common/bs4/builder/__init__.py +++ b/libs/common/bs4/builder/__init__.py @@ -1,15 +1,21 @@ -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. +# Use of this source code is governed by the MIT license. +__license__ = "MIT" from collections import defaultdict import itertools +import re +import warnings import sys from bs4.element import ( CharsetMetaAttributeValue, ContentMetaAttributeValue, - HTMLAwareEntitySubstitution, - whitespace_re - ) + RubyParenthesisString, + RubyTextString, + Stylesheet, + Script, + TemplateString, + nonwhitespace_re +) __all__ = [ 'HTMLTreeBuilder', @@ -26,20 +32,41 @@ XML = 'xml' HTML = 'html' HTML_5 = 'html5' +class XMLParsedAsHTMLWarning(UserWarning): + """The warning issued when an HTML parser is used to parse + XML that is not XHTML. + """ + MESSAGE = """It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features="xml"` into the BeautifulSoup constructor.""" + class TreeBuilderRegistry(object): - + """A way of looking up TreeBuilder subclasses by their name or by desired + features. + """ + def __init__(self): self.builders_for_feature = defaultdict(list) self.builders = [] def register(self, treebuilder_class): - """Register a treebuilder based on its advertised features.""" + """Register a treebuilder based on its advertised features. + + :param treebuilder_class: A subclass of Treebuilder. its .features + attribute should list its features. + """ for feature in treebuilder_class.features: self.builders_for_feature[feature].insert(0, treebuilder_class) self.builders.insert(0, treebuilder_class) def lookup(self, *features): + """Look up a TreeBuilder subclass with the desired features. + + :param features: A list of features to look for. If none are + provided, the most recently registered TreeBuilder subclass + will be used. + :return: A TreeBuilder subclass, or None if there's no + registered subclass with all the requested features. + """ if len(self.builders) == 0: # There are no builders at all. return None @@ -82,7 +109,7 @@ class TreeBuilderRegistry(object): builder_registry = TreeBuilderRegistry() class TreeBuilder(object): - """Turn a document into a Beautiful Soup object tree.""" + """Turn a textual document into a Beautiful Soup object tree.""" NAME = "[Unknown tree builder]" ALTERNATE_NAMES = [] @@ -90,19 +117,89 @@ class TreeBuilder(object): is_xml = False picklable = False - preserve_whitespace_tags = set() empty_element_tags = None # A tag will be considered an empty-element # tag when and only when it has no contents. # A value for these tag/attribute combinations is a space- or # comma-separated list of CDATA, rather than a single CDATA. - cdata_list_attributes = {} + DEFAULT_CDATA_LIST_ATTRIBUTES = {} + # Whitespace should be preserved inside these tags. + DEFAULT_PRESERVE_WHITESPACE_TAGS = set() - def __init__(self): + # The textual contents of tags with these names should be + # instantiated with some class other than NavigableString. + DEFAULT_STRING_CONTAINERS = {} + + USE_DEFAULT = object() + + # Most parsers don't keep track of line numbers. + TRACKS_LINE_NUMBERS = False + + def __init__(self, multi_valued_attributes=USE_DEFAULT, + preserve_whitespace_tags=USE_DEFAULT, + store_line_numbers=USE_DEFAULT, + string_containers=USE_DEFAULT, + ): + """Constructor. + + :param multi_valued_attributes: If this is set to None, the + TreeBuilder will not turn any values for attributes like + 'class' into lists. Setting this to a dictionary will + customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES + for an example. + + Internally, these are called "CDATA list attributes", but that + probably doesn't make sense to an end-user, so the argument name + is `multi_valued_attributes`. + + :param preserve_whitespace_tags: A list of tags to treat + the way
 tags are treated in HTML. Tags in this list
+         are immune from pretty-printing; their contents will always be
+         output as-is.
+
+        :param string_containers: A dictionary mapping tag names to
+        the classes that should be instantiated to contain the textual
+        contents of those tags. The default is to use NavigableString
+        for every tag, no matter what the name. You can override the
+        default by changing DEFAULT_STRING_CONTAINERS.
+
+        :param store_line_numbers: If the parser keeps track of the
+         line numbers and positions of the original markup, that
+         information will, by default, be stored in each corresponding
+         `Tag` object. You can turn this off by passing
+         store_line_numbers=False. If the parser you're using doesn't 
+         keep track of this information, then setting store_line_numbers=True
+         will do nothing.
+        """
         self.soup = None
+        if multi_valued_attributes is self.USE_DEFAULT:
+            multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
+        self.cdata_list_attributes = multi_valued_attributes
+        if preserve_whitespace_tags is self.USE_DEFAULT:
+            preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS
+        self.preserve_whitespace_tags = preserve_whitespace_tags
+        if store_line_numbers == self.USE_DEFAULT:
+            store_line_numbers = self.TRACKS_LINE_NUMBERS
+        self.store_line_numbers = store_line_numbers 
+        if string_containers == self.USE_DEFAULT:
+            string_containers = self.DEFAULT_STRING_CONTAINERS
+        self.string_containers = string_containers
+        
+    def initialize_soup(self, soup):
+        """The BeautifulSoup object has been initialized and is now
+        being associated with the TreeBuilder.
 
+        :param soup: A BeautifulSoup object.
+        """
+        self.soup = soup
+        
     def reset(self):
+        """Do any work necessary to reset the underlying parser
+        for a new document.
+
+        By default, this does nothing.
+        """
         pass
 
     def can_be_empty_element(self, tag_name):
@@ -114,24 +211,58 @@ class TreeBuilder(object):
         For instance: an HTMLBuilder does not consider a 

tag to be an empty-element tag (it's not in HTMLBuilder.empty_element_tags). This means an empty

tag - will be presented as "

", not "

". + will be presented as "

", not "

" or "

". The default implementation has no opinion about which tags are empty-element tags, so a tag will be presented as an - empty-element tag if and only if it has no contents. - "" will become "", and "bar" will + empty-element tag if and only if it has no children. + "" will become "", and "bar" will be left alone. + + :param tag_name: The name of a markup tag. """ if self.empty_element_tags is None: return True return tag_name in self.empty_element_tags - + def feed(self, markup): + """Run some incoming markup through some parsing process, + populating the `BeautifulSoup` object in self.soup. + + This method is not implemented in TreeBuilder; it must be + implemented in subclasses. + + :return: None. + """ raise NotImplementedError() def prepare_markup(self, markup, user_specified_encoding=None, - document_declared_encoding=None): - return markup, None, None, False + document_declared_encoding=None, exclude_encodings=None): + """Run any preliminary steps necessary to make incoming markup + acceptable to the parser. + + :param markup: Some markup -- probably a bytestring. + :param user_specified_encoding: The user asked to try this encoding. + :param document_declared_encoding: The markup itself claims to be + in this encoding. NOTE: This argument is not used by the + calling code and can probably be removed. + :param exclude_encodings: The user asked _not_ to try any of + these encodings. + + :yield: A series of 4-tuples: + (markup, encoding, declared encoding, + has undergone character replacement) + + Each 4-tuple represents a strategy for converting the + document to Unicode and parsing it. Each strategy will be tried + in turn. + + By default, the only strategy is to parse the markup + as-is. See `LXMLTreeBuilderForXML` and + `HTMLParserTreeBuilder` for implementations that take into + account the quirks of particular parsers. + """ + yield markup, None, None, False def test_fragment_to_document(self, fragment): """Wrap an HTML fragment to make it look like a document. @@ -143,16 +274,36 @@ class TreeBuilder(object): results against other HTML fragments. This method should not be used outside of tests. + + :param fragment: A string -- fragment of HTML. + :return: A string -- a full HTML document. """ return fragment def set_up_substitutions(self, tag): + """Set up any substitutions that will need to be performed on + a `Tag` when it's output as a string. + + By default, this does nothing. See `HTMLTreeBuilder` for a + case where this is used. + + :param tag: A `Tag` + :return: Whether or not a substitution was performed. + """ return False def _replace_cdata_list_attribute_values(self, tag_name, attrs): - """Replaces class="foo bar" with class=["foo", "bar"] + """When an attribute value is associated with a tag that can + have multiple values for that attribute, convert the string + value to a list of strings. - Modifies its input in place. + Basically, replaces class="foo bar" with class=["foo", "bar"] + + NOTE: This method modifies its input in place. + + :param tag_name: The name of a tag. + :param attrs: A dictionary containing the tag's attributes. + Any appropriate attribute values will be modified in place. """ if not attrs: return attrs @@ -167,7 +318,7 @@ class TreeBuilder(object): # values. Split it into a list. value = attrs[attr] if isinstance(value, str): - values = whitespace_re.split(value) + values = nonwhitespace_re.findall(value) else: # html5lib sometimes calls setAttributes twice # for the same tag when rearranging the parse @@ -178,9 +329,13 @@ class TreeBuilder(object): values = value attrs[attr] = values return attrs - + class SAXTreeBuilder(TreeBuilder): - """A Beautiful Soup treebuilder that listens for SAX events.""" + """A Beautiful Soup treebuilder that listens for SAX events. + + This is not currently used for anything, but it demonstrates + how a simple TreeBuilder would work. + """ def feed(self, markup): raise NotImplementedError() @@ -190,11 +345,11 @@ class SAXTreeBuilder(TreeBuilder): def startElement(self, name, attrs): attrs = dict((key[1], value) for key, value in list(attrs.items())) - #print "Start %s, %r" % (name, attrs) + #print("Start %s, %r" % (name, attrs)) self.soup.handle_starttag(name, attrs) def endElement(self, name): - #print "End %s" % name + #print("End %s" % name) self.soup.handle_endtag(name) def startElementNS(self, nsTuple, nodeName, attrs): @@ -231,7 +386,6 @@ class HTMLTreeBuilder(TreeBuilder): Such as which tags are empty-element tags. """ - preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags empty_element_tags = set([ # These are from HTML5. 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', @@ -245,6 +399,30 @@ class HTMLTreeBuilder(TreeBuilder): # but it may do so eventually, and this information is available if # you need to use it. block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]) + + # These HTML tags need special treatment so they can be + # represented by a string class other than NavigableString. + # + # For some of these tags, it's because the HTML standard defines + # an unusual content model for them. I made this list by going + # through the HTML spec + # (https://html.spec.whatwg.org/#metadata-content) and looking for + # "metadata content" elements that can contain strings. + # + # The Ruby tags ( and ) are here despite being normal + # "phrasing content" tags, because the content they contain is + # qualitatively different from other text in the document, and it + # can be useful to be able to distinguish it. + # + # TODO: Arguably

foobaz

" - self.assertSoupEquals(markup) - - soup = self.soup(markup) - comment = soup.find(text="foobar") - self.assertEqual(comment.__class__, Comment) - - # The comment is properly integrated into the tree. - foo = soup.find(text="foo") - self.assertEqual(comment, foo.next_element) - baz = soup.find(text="baz") - self.assertEqual(comment, baz.previous_element) - - def test_preserved_whitespace_in_pre_and_textarea(self): - """Whitespace must be preserved in
 and "
-        self.assertSoupEquals(pre_markup)
-        self.assertSoupEquals(textarea_markup)
-
-        soup = self.soup(pre_markup)
-        self.assertEqual(soup.pre.prettify(), pre_markup)
-
-        soup = self.soup(textarea_markup)
-        self.assertEqual(soup.textarea.prettify(), textarea_markup)
-
-        soup = self.soup("")
-        self.assertEqual(soup.textarea.prettify(), "")
-
-    def test_nested_inline_elements(self):
-        """Inline elements can be nested indefinitely."""
-        b_tag = "Inside a B tag"
-        self.assertSoupEquals(b_tag)
-
-        nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag) - - double_nested_b_tag = "

A doubly nested tag

" - self.assertSoupEquals(nested_b_tag) - - def test_nested_block_level_elements(self): - """Block elements can be nested.""" - soup = self.soup('

Foo

') - blockquote = soup.blockquote - self.assertEqual(blockquote.p.b.string, 'Foo') - self.assertEqual(blockquote.b.string, 'Foo') - - def test_correctly_nested_tables(self): - """One table can go inside another one.""" - markup = ('' - '' - "') - - self.assertSoupEquals( - markup, - '
Here's another table:" - '' - '' - '
foo
Here\'s another table:' - '
foo
' - '
') - - self.assertSoupEquals( - "" - "" - "
Foo
Bar
Baz
") - - def test_deeply_nested_multivalued_attribute(self): - # html5lib can set the attributes of the same tag many times - # as it rearranges the tree. This has caused problems with - # multivalued attributes. - markup = '
' - soup = self.soup(markup) - self.assertEqual(["css"], soup.div.div['class']) - - def test_multivalued_attribute_on_html(self): - # html5lib uses a different API to set the attributes ot the - # tag. This has caused problems with multivalued - # attributes. - markup = '' - soup = self.soup(markup) - self.assertEqual(["a", "b"], soup.html['class']) - - def test_angle_brackets_in_attribute_values_are_escaped(self): - self.assertSoupEquals('', '') - - def test_strings_resembling_character_entity_references(self): - # "&T" and "&p" look like incomplete character entities, but they are - # not. - self.assertSoupEquals( - "

• AT&T is in the s&p 500

", - "

\u2022 AT&T is in the s&p 500

" - ) - - def test_entities_in_foreign_document_encoding(self): - # “ and ” are invalid numeric entities referencing - # Windows-1252 characters. - references a character common - # to Windows-1252 and Unicode, and ☃ references a - # character only found in Unicode. - # - # All of these entities should be converted to Unicode - # characters. - markup = "

“Hello” -☃

" - soup = self.soup(markup) - self.assertEqual("“Hello” -☃", soup.p.string) - - def test_entities_in_attributes_converted_to_unicode(self): - expect = '

' - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - - def test_entities_in_text_converted_to_unicode(self): - expect = '

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - - def test_quot_entity_converted_to_quotation_mark(self): - self.assertSoupEquals("

I said "good day!"

", - '

I said "good day!"

') - - def test_out_of_range_entity(self): - expect = "\N{REPLACEMENT CHARACTER}" - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) - - def test_multipart_strings(self): - "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." - soup = self.soup("

\nfoo

") - self.assertEqual("p", soup.h2.string.next_element.name) - self.assertEqual("p", soup.p.name) - self.assertConnectedness(soup) - - def test_empty_element_tags(self): - """Verify consistent handling of empty-element tags, - no matter how they come in through the markup. - """ - self.assertSoupEquals('


', "


") - self.assertSoupEquals('


', "


") - - def test_head_tag_between_head_and_body(self): - "Prevent recurrence of a bug in the html5lib treebuilder." - content = """ - - foo - -""" - soup = self.soup(content) - self.assertNotEqual(None, soup.html.body) - self.assertConnectedness(soup) - - def test_multiple_copies_of_a_tag(self): - "Prevent recurrence of a bug in the html5lib treebuilder." - content = """ - - - - - -""" - soup = self.soup(content) - self.assertConnectedness(soup.article) - - def test_basic_namespaces(self): - """Parsers don't need to *understand* namespaces, but at the - very least they should not choke on namespaces or lose - data.""" - - markup = b'4' - soup = self.soup(markup) - self.assertEqual(markup, soup.encode()) - html = soup.html - self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns']) - self.assertEqual( - 'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml']) - self.assertEqual( - 'http://www.w3.org/2000/svg', soup.html['xmlns:svg']) - - def test_multivalued_attribute_value_becomes_list(self): - markup = b'' - soup = self.soup(markup) - self.assertEqual(['foo', 'bar'], soup.a['class']) - - # - # Generally speaking, tests below this point are more tests of - # Beautiful Soup than tests of the tree builders. But parsers are - # weird, so we run these tests separately for every tree builder - # to detect any differences between them. - # - - def test_can_parse_unicode_document(self): - # A seemingly innocuous document... but it's in Unicode! And - # it contains characters that can't be represented in the - # encoding found in the declaration! The horror! - markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) - self.assertEqual('Sacr\xe9 bleu!', soup.body.string) - - def test_soupstrainer(self): - """Parsers should be able to work with SoupStrainers.""" - strainer = SoupStrainer("b") - soup = self.soup("A bold statement", - parse_only=strainer) - self.assertEqual(soup.decode(), "bold") - - def test_single_quote_attribute_values_become_double_quotes(self): - self.assertSoupEquals("", - '') - - def test_attribute_values_with_nested_quotes_are_left_alone(self): - text = """a""" - self.assertSoupEquals(text) - - def test_attribute_values_with_double_nested_quotes_get_quoted(self): - text = """a""" - soup = self.soup(text) - soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' - self.assertSoupEquals( - soup.foo.decode(), - """a""") - - def test_ampersand_in_attribute_value_gets_escaped(self): - self.assertSoupEquals('', - '') - - self.assertSoupEquals( - 'foo', - 'foo') - - def test_escaped_ampersand_in_attribute_value_is_left_alone(self): - self.assertSoupEquals('') - - def test_entities_in_strings_converted_during_parsing(self): - # Both XML and HTML entities are converted to Unicode characters - # during parsing. - text = "

<<sacré bleu!>>

" - expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" - self.assertSoupEquals(text, expected) - - def test_smart_quotes_converted_on_the_way_in(self): - # Microsoft smart quotes are converted to Unicode characters during - # parsing. - quote = b"

\x91Foo\x92

" - soup = self.soup(quote) - self.assertEqual( - soup.p.string, - "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") - - def test_non_breaking_spaces_converted_on_the_way_in(self): - soup = self.soup("  ") - self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2) - - def test_entities_converted_on_the_way_out(self): - text = "

<<sacré bleu!>>

" - expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") - soup = self.soup(text) - self.assertEqual(soup.p.encode("utf-8"), expected) - - def test_real_iso_latin_document(self): - # Smoke test of interrelated functionality, using an - # easy-to-understand document. - - # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. - unicode_html = '

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' - - # That's because we're going to encode it into ISO-Latin-1, and use - # that to test. - iso_latin_html = unicode_html.encode("iso-8859-1") - - # Parse the ISO-Latin-1 HTML. - soup = self.soup(iso_latin_html) - # Encode it to UTF-8. - result = soup.encode("utf-8") - - # What do we expect the result to look like? Well, it would - # look like unicode_html, except that the META tag would say - # UTF-8 instead of ISO-Latin-1. - expected = unicode_html.replace("ISO-Latin-1", "utf-8") - - # And, of course, it would be in UTF-8, not Unicode. - expected = expected.encode("utf-8") - - # Ta-da! - self.assertEqual(result, expected) - - def test_real_shift_jis_document(self): - # Smoke test to make sure the parser can handle a document in - # Shift-JIS encoding, without choking. - shift_jis_html = ( - b'
'
-            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
-            b'
') - unicode_html = shift_jis_html.decode("shift-jis") - soup = self.soup(unicode_html) - - # Make sure the parse tree is correctly encoded to various - # encodings. - self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) - self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) - - def test_real_hebrew_document(self): - # A real-world test to make sure we can convert ISO-8859-9 (a - # Hebrew encoding) to UTF-8. - hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' - soup = self.soup( - hebrew_document, from_encoding="iso8859-8") - # Some tree builders call it iso8859-8, others call it iso-8859-9. - # That's not a difference we really care about. - assert soup.original_encoding in ('iso8859-8', 'iso-8859-8') - self.assertEqual( - soup.encode('utf-8'), - hebrew_document.decode("iso8859-8").encode("utf-8")) - - def test_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - - # Here's a document incorporating that meta tag. - shift_jis_html = ( - '\n%s\n' - '' - 'Shift-JIS markup goes here.') % meta_tag - soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. - parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) - content = parsed_meta['content'] - self.assertEqual('text/html; charset=x-sjis', content) - - # But that value is actually a ContentMetaAttributeValue object. - self.assertTrue(isinstance(content, ContentMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. - self.assertEqual('text/html; charset=utf8', content.encode("utf8")) - - # For the rest of the story, see TestSubstitutions in - # test_tree.py. - - def test_html5_style_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - - # Here's a document incorporating that meta tag. - shift_jis_html = ( - '\n%s\n' - '' - 'Shift-JIS markup goes here.') % meta_tag - soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. - parsed_meta = soup.find('meta', id="encoding") - charset = parsed_meta['charset'] - self.assertEqual('x-sjis', charset) - - # But that value is actually a CharsetMetaAttributeValue object. - self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. - self.assertEqual('utf8', charset.encode("utf8")) - - def test_tag_with_no_attributes_can_have_attributes_added(self): - data = self.soup("text") - data.a['foo'] = 'bar' - self.assertEqual('text', data.a.decode()) - -class XMLTreeBuilderSmokeTest(object): - - def test_pickle_and_unpickle_identity(self): - # Pickling a tree, then unpickling it, yields a tree identical - # to the original. - tree = self.soup("foo") - dumped = pickle.dumps(tree, 2) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.__class__, BeautifulSoup) - self.assertEqual(loaded.decode(), tree.decode()) - - def test_docstring_generated(self): - soup = self.soup("") - self.assertEqual( - soup.encode(), b'\n') - - def test_xml_declaration(self): - markup = b"""\n""" - soup = self.soup(markup) - self.assertEqual(markup, soup.encode("utf8")) - - def test_processing_instruction(self): - markup = b"""\n""" - soup = self.soup(markup) - self.assertEqual(markup, soup.encode("utf8")) - - def test_real_xhtml_document(self): - """A real XHTML document should come out *exactly* the same as it went in.""" - markup = b""" - - -Hello. -Goodbye. -""" - soup = self.soup(markup) - self.assertEqual( - soup.encode("utf-8"), markup) - - def test_nested_namespaces(self): - doc = b""" - - - - - -""" - soup = self.soup(doc) - self.assertEqual(doc, soup.encode()) - - def test_formatter_processes_script_tag_for_xml_documents(self): - doc = """ - -""" - soup = BeautifulSoup(doc, "lxml-xml") - # lxml would have stripped this while parsing, but we can add - # it later. - soup.script.string = 'console.log("< < hey > > ");' - encoded = soup.encode() - self.assertTrue(b"< < hey > >" in encoded) - - def test_can_parse_unicode_document(self): - markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) - self.assertEqual('Sacr\xe9 bleu!', soup.root.string) - - def test_popping_namespaced_tag(self): - markup = 'b2012-07-02T20:33:42Zcd' - soup = self.soup(markup) - self.assertEqual( - str(soup.rss), markup) - - def test_docstring_includes_correct_encoding(self): - soup = self.soup("") - self.assertEqual( - soup.encode("latin1"), - b'\n') - - def test_large_xml_document(self): - """A large XML document should come out the same as it went in.""" - markup = (b'\n' - + b'0' * (2**12) - + b'') - soup = self.soup(markup) - self.assertEqual(soup.encode("utf-8"), markup) - - - def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): - self.assertSoupEquals("

", "

") - self.assertSoupEquals("

foo

") - - def test_namespaces_are_preserved(self): - markup = 'This tag is in the a namespaceThis tag is in the b namespace' - soup = self.soup(markup) - root = soup.root - self.assertEqual("http://example.com/", root['xmlns:a']) - self.assertEqual("http://example.net/", root['xmlns:b']) - - def test_closing_namespaced_tag(self): - markup = '

20010504

' - soup = self.soup(markup) - self.assertEqual(str(soup.p), markup) - - def test_namespaced_attributes(self): - markup = '' - soup = self.soup(markup) - self.assertEqual(str(soup.foo), markup) - - def test_namespaced_attributes_xml_namespace(self): - markup = 'bar' - soup = self.soup(markup) - self.assertEqual(str(soup.foo), markup) - - def test_find_by_prefixed_name(self): - doc = """ -foo - bar - baz - -""" - soup = self.soup(doc) - - # There are three tags. - self.assertEqual(3, len(soup.find_all('tag'))) - - # But two of them are ns1:tag and one of them is ns2:tag. - self.assertEqual(2, len(soup.find_all('ns1:tag'))) - self.assertEqual(1, len(soup.find_all('ns2:tag'))) - - self.assertEqual(1, len(soup.find_all('ns2:tag', key='value'))) - self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag']))) - - def test_copy_tag_preserves_namespace(self): - xml = """ -""" - - soup = self.soup(xml) - tag = soup.document - duplicate = copy.copy(tag) - - # The two tags have the same namespace prefix. - self.assertEqual(tag.prefix, duplicate.prefix) - - -class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): - """Smoke test for a tree builder that supports HTML5.""" - - def test_real_xhtml_document(self): - # Since XHTML is not HTML5, HTML5 parsers are not tested to handle - # XHTML documents in any particular way. - pass - - def test_html_tags_have_namespace(self): - markup = "" - soup = self.soup(markup) - self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace) - - def test_svg_tags_have_namespace(self): - markup = '' - soup = self.soup(markup) - namespace = "http://www.w3.org/2000/svg" - self.assertEqual(namespace, soup.svg.namespace) - self.assertEqual(namespace, soup.circle.namespace) - - - def test_mathml_tags_have_namespace(self): - markup = '5' - soup = self.soup(markup) - namespace = 'http://www.w3.org/1998/Math/MathML' - self.assertEqual(namespace, soup.math.namespace) - self.assertEqual(namespace, soup.msqrt.namespace) - - def test_xml_declaration_becomes_comment(self): - markup = '' - soup = self.soup(markup) - self.assertTrue(isinstance(soup.contents[0], Comment)) - self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') - self.assertEqual("html", soup.contents[0].next_element.name) - -def skipIf(condition, reason): - def nothing(test, *args, **kwargs): - return None - - def decorator(test_item): - if condition: - return nothing - else: - return test_item - - return decorator diff --git a/libs/common/bs4/tests/__init__.py b/libs/common/bs4/tests/__init__.py index 142c8cc3..4af4b0ce 100644 --- a/libs/common/bs4/tests/__init__.py +++ b/libs/common/bs4/tests/__init__.py @@ -1 +1,1191 @@ -"The beautifulsoup tests." +# encoding: utf-8 +"""Helper classes for tests.""" + +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + +import pickle +import copy +import functools +import warnings +import pytest +from bs4 import BeautifulSoup +from bs4.element import ( + CharsetMetaAttributeValue, + Comment, + ContentMetaAttributeValue, + Doctype, + PYTHON_SPECIFIC_ENCODINGS, + SoupStrainer, + Script, + Stylesheet, + Tag +) + +from bs4.builder import ( + DetectsXMLParsedAsHTML, + HTMLParserTreeBuilder, + XMLParsedAsHTMLWarning, +) +default_builder = HTMLParserTreeBuilder + +BAD_DOCUMENT = """A bare string + + +
+
HTML5 does allow CDATA sections in SVG
+
A tag
+
A
tag that supposedly has contents.
+
AT&T
+
+
+
This numeric entity is missing the final semicolon:
+
+
a
+
This document contains (do you see it?)
+
This document ends with That attribute value was bogus
+The doctype is invalid because it contains extra whitespace +
That boolean attribute had no value
+
Here's a nonexistent entity: &#foo; (do you see it?)
+
This document ends before the entity finishes: > +

Paragraphs shouldn't contain block display elements, but this one does:

you see?

+Multiple values for the same attribute. +
Here's a table
+
+
This tag contains nothing but whitespace:
+

This p tag is cut off by

the end of the blockquote tag
+
Here's a nested table:
foo
This table contains bare markup
+ +
This document contains a surprise doctype
+ +
Tag name contains Unicode characters
+ + +""" + + +class SoupTest(object): + + @property + def default_builder(self): + return default_builder + + def soup(self, markup, **kwargs): + """Build a Beautiful Soup object from markup.""" + builder = kwargs.pop('builder', self.default_builder) + return BeautifulSoup(markup, builder=builder, **kwargs) + + def document_for(self, markup, **kwargs): + """Turn an HTML fragment into a document. + + The details depend on the builder. + """ + return self.default_builder(**kwargs).test_fragment_to_document(markup) + + def assert_soup(self, to_parse, compare_parsed_to=None): + """Parse some markup using Beautiful Soup and verify that + the output markup is as expected. + """ + builder = self.default_builder + obj = BeautifulSoup(to_parse, builder=builder) + if compare_parsed_to is None: + compare_parsed_to = to_parse + + # Verify that the documents come out the same. + assert obj.decode() == self.document_for(compare_parsed_to) + + # Also run some checks on the BeautifulSoup object itself: + + # Verify that every tag that was opened was eventually closed. + + # There are no tags in the open tag counter. + assert all(v==0 for v in list(obj.open_tag_counter.values())) + + # The only tag in the tag stack is the one for the root + # document. + assert [obj.ROOT_TAG_NAME] == [x.name for x in obj.tagStack] + + assertSoupEquals = assert_soup + + def assertConnectedness(self, element): + """Ensure that next_element and previous_element are properly + set for all descendants of the given element. + """ + earlier = None + for e in element.descendants: + if earlier: + assert e == earlier.next_element + assert earlier == e.previous_element + earlier = e + + def linkage_validator(self, el, _recursive_call=False): + """Ensure proper linkage throughout the document.""" + descendant = None + # Document element should have no previous element or previous sibling. + # It also shouldn't have a next sibling. + if el.parent is None: + assert el.previous_element is None,\ + "Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format( + el, el.previous_element, None + ) + assert el.previous_sibling is None,\ + "Bad previous_sibling\nNODE: {}\nPREV: {}\nEXPECTED: {}".format( + el, el.previous_sibling, None + ) + assert el.next_sibling is None,\ + "Bad next_sibling\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format( + el, el.next_sibling, None + ) + + idx = 0 + child = None + last_child = None + last_idx = len(el.contents) - 1 + for child in el.contents: + descendant = None + + # Parent should link next element to their first child + # That child should have no previous sibling + if idx == 0: + if el.parent is not None: + assert el.next_element is child,\ + "Bad next_element\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format( + el, el.next_element, child + ) + assert child.previous_element is el,\ + "Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format( + child, child.previous_element, el + ) + assert child.previous_sibling is None,\ + "Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED: {}".format( + child, child.previous_sibling, None + ) + + # If not the first child, previous index should link as sibling to this index + # Previous element should match the last index or the last bubbled up descendant + else: + assert child.previous_sibling is el.contents[idx - 1],\ + "Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED {}".format( + child, child.previous_sibling, el.contents[idx - 1] + ) + assert el.contents[idx - 1].next_sibling is child,\ + "Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + el.contents[idx - 1], el.contents[idx - 1].next_sibling, child + ) + + if last_child is not None: + assert child.previous_element is last_child,\ + "Bad previous_element\nNODE: {}\nPREV {}\nEXPECTED {}\nCONTENTS {}".format( + child, child.previous_element, last_child, child.parent.contents + ) + assert last_child.next_element is child,\ + "Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + last_child, last_child.next_element, child + ) + + if isinstance(child, Tag) and child.contents: + descendant = self.linkage_validator(child, True) + # A bubbled up descendant should have no next siblings + assert descendant.next_sibling is None,\ + "Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + descendant, descendant.next_sibling, None + ) + + # Mark last child as either the bubbled up descendant or the current child + if descendant is not None: + last_child = descendant + else: + last_child = child + + # If last child, there are non next siblings + if idx == last_idx: + assert child.next_sibling is None,\ + "Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + child, child.next_sibling, None + ) + idx += 1 + + child = descendant if descendant is not None else child + if child is None: + child = el + + if not _recursive_call and child is not None: + target = el + while True: + if target is None: + assert child.next_element is None, \ + "Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + child, child.next_element, None + ) + break + elif target.next_sibling is not None: + assert child.next_element is target.next_sibling, \ + "Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + child, child.next_element, target.next_sibling + ) + break + target = target.parent + + # We are done, so nothing to return + return None + else: + # Return the child to the recursive caller + return child + + def assert_selects(self, tags, should_match): + """Make sure that the given tags have the correct text. + + This is used in tests that define a bunch of tags, each + containing a single string, and then select certain strings by + some mechanism. + """ + assert [tag.string for tag in tags] == should_match + + def assert_selects_ids(self, tags, should_match): + """Make sure that the given tags have the correct IDs. + + This is used in tests that define a bunch of tags, each + containing a single string, and then select certain strings by + some mechanism. + """ + assert [tag['id'] for tag in tags] == should_match + + +class TreeBuilderSmokeTest(object): + # Tests that are common to HTML and XML tree builders. + + @pytest.mark.parametrize( + "multi_valued_attributes", + [None, dict(b=['class']), {'*': ['notclass']}] + ) + def test_attribute_not_multi_valued(self, multi_valued_attributes): + markup = '' + soup = self.soup(markup, multi_valued_attributes=multi_valued_attributes) + assert soup.a['class'] == 'a b c' + + @pytest.mark.parametrize( + "multi_valued_attributes", [dict(a=['class']), {'*': ['class']}] + ) + def test_attribute_multi_valued(self, multi_valued_attributes): + markup = '' + soup = self.soup( + markup, multi_valued_attributes=multi_valued_attributes + ) + assert soup.a['class'] == ['a', 'b', 'c'] + + def test_fuzzed_input(self): + # This test centralizes in one place the various fuzz tests + # for Beautiful Soup created by the oss-fuzz project. + + # These strings superficially resemble markup, but they + # generally can't be parsed into anything. The best we can + # hope for is that parsing these strings won't crash the + # parser. + # + # n.b. This markup is commented out because these fuzz tests + # _do_ crash the parser. However the crashes are due to bugs + # in html.parser, not Beautiful Soup -- otherwise I'd fix the + # bugs! + + bad_markup = [ + # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873 + # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700 + # https://bugs.python.org/issue37747 + # + #b'\nSome CSS" + ) + assert isinstance(soup.style.string, Stylesheet) + assert isinstance(soup.script.string, Script) + + soup = self.soup( + "" + ) + assert isinstance(soup.style.string, Stylesheet) + # The contents of the style tag resemble an HTML comment, but + # it's not treated as a comment. + assert soup.style.string == "" + assert isinstance(soup.style.string, Stylesheet) + + def test_pickle_and_unpickle_identity(self): + # Pickling a tree, then unpickling it, yields a tree identical + # to the original. + tree = self.soup("foo") + dumped = pickle.dumps(tree, 2) + loaded = pickle.loads(dumped) + assert loaded.__class__ == BeautifulSoup + assert loaded.decode() == tree.decode() + + def assertDoctypeHandled(self, doctype_fragment): + """Assert that a given doctype string is handled correctly.""" + doctype_str, soup = self._document_with_doctype(doctype_fragment) + + # Make sure a Doctype object was created. + doctype = soup.contents[0] + assert doctype.__class__ == Doctype + assert doctype == doctype_fragment + assert soup.encode("utf8")[:len(doctype_str)] == doctype_str + + # Make sure that the doctype was correctly associated with the + # parse tree and that the rest of the document parsed. + assert soup.p.contents[0] == 'foo' + + def _document_with_doctype(self, doctype_fragment, doctype_string="DOCTYPE"): + """Generate and parse a document with the given doctype.""" + doctype = '' % (doctype_string, doctype_fragment) + markup = doctype + '\n

foo

' + soup = self.soup(markup) + return doctype.encode("utf8"), soup + + def test_normal_doctypes(self): + """Make sure normal, everyday HTML doctypes are handled correctly.""" + self.assertDoctypeHandled("html") + self.assertDoctypeHandled( + 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"') + + def test_empty_doctype(self): + soup = self.soup("") + doctype = soup.contents[0] + assert "" == doctype.strip() + + def test_mixed_case_doctype(self): + # A lowercase or mixed-case doctype becomes a Doctype. + for doctype_fragment in ("doctype", "DocType"): + doctype_str, soup = self._document_with_doctype( + "html", doctype_fragment + ) + + # Make sure a Doctype object was created and that the DOCTYPE + # is uppercase. + doctype = soup.contents[0] + assert doctype.__class__ == Doctype + assert doctype == "html" + assert soup.encode("utf8")[:len(doctype_str)] == b"" + + # Make sure that the doctype was correctly associated with the + # parse tree and that the rest of the document parsed. + assert soup.p.contents[0] == 'foo' + + def test_public_doctype_with_url(self): + doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"' + self.assertDoctypeHandled(doctype) + + def test_system_doctype(self): + self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"') + + def test_namespaced_system_doctype(self): + # We can handle a namespaced doctype with a system ID. + self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"') + + def test_namespaced_public_doctype(self): + # Test a namespaced doctype with a public id. + self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"') + + def test_real_xhtml_document(self): + """A real XHTML document should come out more or less the same as it went in.""" + markup = b""" + + +Hello. +Goodbye. +""" + with warnings.catch_warnings(record=True) as w: + soup = self.soup(markup) + assert soup.encode("utf-8").replace(b"\n", b"") == markup.replace(b"\n", b"") + + # No warning was issued about parsing an XML document as HTML, + # because XHTML is both. + assert w == [] + + + def test_namespaced_html(self): + # When a namespaced XML document is parsed as HTML it should + # be treated as HTML with weird tag names. + markup = b"""content""" + with warnings.catch_warnings(record=True) as w: + soup = self.soup(markup) + + assert 2 == len(soup.find_all("ns1:foo")) + + # n.b. no "you're parsing XML as HTML" warning was given + # because there was no XML declaration. + assert [] == w + + def test_detect_xml_parsed_as_html(self): + # A warning is issued when parsing an XML document as HTML, + # but basic stuff should still work. + markup = b"""string""" + with warnings.catch_warnings(record=True) as w: + soup = self.soup(markup) + assert soup.tag.string == 'string' + [warning] = w + assert isinstance(warning.message, XMLParsedAsHTMLWarning) + assert str(warning.message) == XMLParsedAsHTMLWarning.MESSAGE + + # NOTE: the warning is not issued if the document appears to + # be XHTML (tested with test_real_xhtml_document in the + # superclass) or if there is no XML declaration (tested with + # test_namespaced_html in the superclass). + + def test_processing_instruction(self): + # We test both Unicode and bytestring to verify that + # process_markup correctly sets processing_instruction_class + # even when the markup is already Unicode and there is no + # need to process anything. + markup = """""" + soup = self.soup(markup) + assert markup == soup.decode() + + markup = b"""""" + soup = self.soup(markup) + assert markup == soup.encode("utf8") + + def test_deepcopy(self): + """Make sure you can copy the tree builder. + + This is important because the builder is part of a + BeautifulSoup object, and we want to be able to copy that. + """ + copy.deepcopy(self.default_builder) + + def test_p_tag_is_never_empty_element(self): + """A

tag is never designated as an empty-element tag. + + Even if the markup shows it as an empty-element tag, it + shouldn't be presented that way. + """ + soup = self.soup("

") + assert not soup.p.is_empty_element + assert str(soup.p) == "

" + + def test_unclosed_tags_get_closed(self): + """A tag that's not closed by the end of the document should be closed. + + This applies to all tags except empty-element tags. + """ + self.assert_soup("

", "

") + self.assert_soup("", "") + + self.assert_soup("
", "
") + + def test_br_is_always_empty_element_tag(self): + """A
tag is designated as an empty-element tag. + + Some parsers treat

as one
tag, some parsers as + two tags, but it should always be an empty-element tag. + """ + soup = self.soup("

") + assert soup.br.is_empty_element + assert str(soup.br) == "
" + + def test_nested_formatting_elements(self): + self.assert_soup("") + + def test_double_head(self): + html = ''' + + +Ordinary HEAD element test + + + +Hello, world! + + +''' + soup = self.soup(html) + assert "text/javascript" == soup.find('script')['type'] + + def test_comment(self): + # Comments are represented as Comment objects. + markup = "

foobaz

" + self.assert_soup(markup) + + soup = self.soup(markup) + comment = soup.find(string="foobar") + assert comment.__class__ == Comment + + # The comment is properly integrated into the tree. + foo = soup.find(string="foo") + assert comment == foo.next_element + baz = soup.find(string="baz") + assert comment == baz.previous_element + + def test_preserved_whitespace_in_pre_and_textarea(self): + """Whitespace must be preserved in
 and "
+        self.assert_soup(pre_markup)
+        self.assert_soup(textarea_markup)
+
+        soup = self.soup(pre_markup)
+        assert soup.pre.prettify() == pre_markup
+
+        soup = self.soup(textarea_markup)
+        assert soup.textarea.prettify() == textarea_markup
+
+        soup = self.soup("")
+        assert soup.textarea.prettify() == ""
+
+    def test_nested_inline_elements(self):
+        """Inline elements can be nested indefinitely."""
+        b_tag = "Inside a B tag"
+        self.assert_soup(b_tag)
+
+        nested_b_tag = "

A nested tag

" + self.assert_soup(nested_b_tag) + + double_nested_b_tag = "

A doubly nested tag

" + self.assert_soup(nested_b_tag) + + def test_nested_block_level_elements(self): + """Block elements can be nested.""" + soup = self.soup('

Foo

') + blockquote = soup.blockquote + assert blockquote.p.b.string == 'Foo' + assert blockquote.b.string == 'Foo' + + def test_correctly_nested_tables(self): + """One table can go inside another one.""" + markup = ('' + '' + "') + + self.assert_soup( + markup, + '
Here's another table:" + '' + '' + '
foo
Here\'s another table:' + '
foo
' + '
') + + self.assert_soup( + "" + "" + "
Foo
Bar
Baz
") + + def test_multivalued_attribute_with_whitespace(self): + # Whitespace separating the values of a multi-valued attribute + # should be ignored. + + markup = '
' + soup = self.soup(markup) + assert ['foo', 'bar'] == soup.div['class'] + + # If you search by the literal name of the class it's like the whitespace + # wasn't there. + assert soup.div == soup.find('div', class_="foo bar") + + def test_deeply_nested_multivalued_attribute(self): + # html5lib can set the attributes of the same tag many times + # as it rearranges the tree. This has caused problems with + # multivalued attributes. + markup = '
' + soup = self.soup(markup) + assert ["css"] == soup.div.div['class'] + + def test_multivalued_attribute_on_html(self): + # html5lib uses a different API to set the attributes ot the + # tag. This has caused problems with multivalued + # attributes. + markup = '' + soup = self.soup(markup) + assert ["a", "b"] == soup.html['class'] + + def test_angle_brackets_in_attribute_values_are_escaped(self): + self.assert_soup('', '') + + def test_strings_resembling_character_entity_references(self): + # "&T" and "&p" look like incomplete character entities, but they are + # not. + self.assert_soup( + "

• AT&T is in the s&p 500

", + "

\u2022 AT&T is in the s&p 500

" + ) + + def test_apos_entity(self): + self.assert_soup( + "

Bob's Bar

", + "

Bob's Bar

", + ) + + def test_entities_in_foreign_document_encoding(self): + # “ and ” are invalid numeric entities referencing + # Windows-1252 characters. - references a character common + # to Windows-1252 and Unicode, and ☃ references a + # character only found in Unicode. + # + # All of these entities should be converted to Unicode + # characters. + markup = "

“Hello” -☃

" + soup = self.soup(markup) + assert "“Hello” -☃" == soup.p.string + + def test_entities_in_attributes_converted_to_unicode(self): + expect = '

' + self.assert_soup('

', expect) + self.assert_soup('

', expect) + self.assert_soup('

', expect) + self.assert_soup('

', expect) + + def test_entities_in_text_converted_to_unicode(self): + expect = '

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' + self.assert_soup("

piñata

", expect) + self.assert_soup("

piñata

", expect) + self.assert_soup("

piñata

", expect) + self.assert_soup("

piñata

", expect) + + def test_quot_entity_converted_to_quotation_mark(self): + self.assert_soup("

I said "good day!"

", + '

I said "good day!"

') + + def test_out_of_range_entity(self): + expect = "\N{REPLACEMENT CHARACTER}" + self.assert_soup("�", expect) + self.assert_soup("�", expect) + self.assert_soup("�", expect) + + def test_multipart_strings(self): + "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." + soup = self.soup("

\nfoo

") + assert "p" == soup.h2.string.next_element.name + assert "p" == soup.p.name + self.assertConnectedness(soup) + + def test_empty_element_tags(self): + """Verify consistent handling of empty-element tags, + no matter how they come in through the markup. + """ + self.assert_soup('


', "


") + self.assert_soup('


', "


") + + def test_head_tag_between_head_and_body(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + foo + +""" + soup = self.soup(content) + assert soup.html.body is not None + self.assertConnectedness(soup) + + def test_multiple_copies_of_a_tag(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + + + + +""" + soup = self.soup(content) + self.assertConnectedness(soup.article) + + def test_basic_namespaces(self): + """Parsers don't need to *understand* namespaces, but at the + very least they should not choke on namespaces or lose + data.""" + + markup = b'4' + soup = self.soup(markup) + assert markup == soup.encode() + html = soup.html + assert 'http://www.w3.org/1999/xhtml' == soup.html['xmlns'] + assert 'http://www.w3.org/1998/Math/MathML' == soup.html['xmlns:mathml'] + assert 'http://www.w3.org/2000/svg' == soup.html['xmlns:svg'] + + def test_multivalued_attribute_value_becomes_list(self): + markup = b'' + soup = self.soup(markup) + assert ['foo', 'bar'] == soup.a['class'] + + # + # Generally speaking, tests below this point are more tests of + # Beautiful Soup than tests of the tree builders. But parsers are + # weird, so we run these tests separately for every tree builder + # to detect any differences between them. + # + + def test_can_parse_unicode_document(self): + # A seemingly innocuous document... but it's in Unicode! And + # it contains characters that can't be represented in the + # encoding found in the declaration! The horror! + markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + assert 'Sacr\xe9 bleu!' == soup.body.string + + def test_soupstrainer(self): + """Parsers should be able to work with SoupStrainers.""" + strainer = SoupStrainer("b") + soup = self.soup("A bold statement", + parse_only=strainer) + assert soup.decode() == "bold" + + def test_single_quote_attribute_values_become_double_quotes(self): + self.assert_soup("", + '') + + def test_attribute_values_with_nested_quotes_are_left_alone(self): + text = """a""" + self.assert_soup(text) + + def test_attribute_values_with_double_nested_quotes_get_quoted(self): + text = """a""" + soup = self.soup(text) + soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' + self.assert_soup( + soup.foo.decode(), + """a""") + + def test_ampersand_in_attribute_value_gets_escaped(self): + self.assert_soup('', + '') + + self.assert_soup( + 'foo', + 'foo') + + def test_escaped_ampersand_in_attribute_value_is_left_alone(self): + self.assert_soup('') + + def test_entities_in_strings_converted_during_parsing(self): + # Both XML and HTML entities are converted to Unicode characters + # during parsing. + text = "

<<sacré bleu!>>

" + expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" + self.assert_soup(text, expected) + + def test_smart_quotes_converted_on_the_way_in(self): + # Microsoft smart quotes are converted to Unicode characters during + # parsing. + quote = b"

\x91Foo\x92

" + soup = self.soup(quote) + assert soup.p.string == "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}" + + def test_non_breaking_spaces_converted_on_the_way_in(self): + soup = self.soup("  ") + assert soup.a.string == "\N{NO-BREAK SPACE}" * 2 + + def test_entities_converted_on_the_way_out(self): + text = "

<<sacré bleu!>>

" + expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") + soup = self.soup(text) + assert soup.p.encode("utf-8") == expected + + def test_real_iso_latin_document(self): + # Smoke test of interrelated functionality, using an + # easy-to-understand document. + + # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. + unicode_html = '

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' + + # That's because we're going to encode it into ISO-Latin-1, and use + # that to test. + iso_latin_html = unicode_html.encode("iso-8859-1") + + # Parse the ISO-Latin-1 HTML. + soup = self.soup(iso_latin_html) + # Encode it to UTF-8. + result = soup.encode("utf-8") + + # What do we expect the result to look like? Well, it would + # look like unicode_html, except that the META tag would say + # UTF-8 instead of ISO-Latin-1. + expected = unicode_html.replace("ISO-Latin-1", "utf-8") + + # And, of course, it would be in UTF-8, not Unicode. + expected = expected.encode("utf-8") + + # Ta-da! + assert result == expected + + def test_real_shift_jis_document(self): + # Smoke test to make sure the parser can handle a document in + # Shift-JIS encoding, without choking. + shift_jis_html = ( + b'
'
+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
+            b'
') + unicode_html = shift_jis_html.decode("shift-jis") + soup = self.soup(unicode_html) + + # Make sure the parse tree is correctly encoded to various + # encodings. + assert soup.encode("utf-8") == unicode_html.encode("utf-8") + assert soup.encode("euc_jp") == unicode_html.encode("euc_jp") + + def test_real_hebrew_document(self): + # A real-world test to make sure we can convert ISO-8859-9 (a + # Hebrew encoding) to UTF-8. + hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' + soup = self.soup( + hebrew_document, from_encoding="iso8859-8") + # Some tree builders call it iso8859-8, others call it iso-8859-9. + # That's not a difference we really care about. + assert soup.original_encoding in ('iso8859-8', 'iso-8859-8') + assert soup.encode('utf-8') == ( + hebrew_document.decode("iso8859-8").encode("utf-8") + ) + + def test_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) + content = parsed_meta['content'] + assert 'text/html; charset=x-sjis' == content + + # But that value is actually a ContentMetaAttributeValue object. + assert isinstance(content, ContentMetaAttributeValue) + + # And it will take on a value that reflects its current + # encoding. + assert 'text/html; charset=utf8' == content.encode("utf8") + + # For the rest of the story, see TestSubstitutions in + # test_tree.py. + + def test_html5_style_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', id="encoding") + charset = parsed_meta['charset'] + assert 'x-sjis' == charset + + # But that value is actually a CharsetMetaAttributeValue object. + assert isinstance(charset, CharsetMetaAttributeValue) + + # And it will take on a value that reflects its current + # encoding. + assert 'utf8' == charset.encode("utf8") + + def test_python_specific_encodings_not_used_in_charset(self): + # You can encode an HTML document using a Python-specific + # encoding, but that encoding won't be mentioned _inside_ the + # resulting document. Instead, the document will appear to + # have no encoding. + for markup in [ + b'' + b'' + ]: + soup = self.soup(markup) + for encoding in PYTHON_SPECIFIC_ENCODINGS: + if encoding in ( + 'idna', 'mbcs', 'oem', 'undefined', + 'string_escape', 'string-escape' + ): + # For one reason or another, these will raise an + # exception if we actually try to use them, so don't + # bother. + continue + encoded = soup.encode(encoding) + assert b'meta charset=""' in encoded + assert encoding.encode("ascii") not in encoded + + def test_tag_with_no_attributes_can_have_attributes_added(self): + data = self.soup("text") + data.a['foo'] = 'bar' + assert 'text' == data.a.decode() + + def test_closing_tag_with_no_opening_tag(self): + # Without BeautifulSoup.open_tag_counter, the tag will + # cause _popToTag to be called over and over again as we look + # for a tag that wasn't there. The result is that 'text2' + # will show up outside the body of the document. + soup = self.soup("

text1

text2
") + assert "

text1

text2
" == soup.body.decode() + + def test_worst_case(self): + """Test the worst case (currently) for linking issues.""" + + soup = self.soup(BAD_DOCUMENT) + self.linkage_validator(soup) + + +class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest): + + def test_pickle_and_unpickle_identity(self): + # Pickling a tree, then unpickling it, yields a tree identical + # to the original. + tree = self.soup("foo") + dumped = pickle.dumps(tree, 2) + loaded = pickle.loads(dumped) + assert loaded.__class__ == BeautifulSoup + assert loaded.decode() == tree.decode() + + def test_docstring_generated(self): + soup = self.soup("") + assert soup.encode() == b'\n' + + def test_xml_declaration(self): + markup = b"""\n""" + soup = self.soup(markup) + assert markup == soup.encode("utf8") + + def test_python_specific_encodings_not_used_in_xml_declaration(self): + # You can encode an XML document using a Python-specific + # encoding, but that encoding won't be mentioned _inside_ the + # resulting document. + markup = b"""\n""" + soup = self.soup(markup) + for encoding in PYTHON_SPECIFIC_ENCODINGS: + if encoding in ( + 'idna', 'mbcs', 'oem', 'undefined', + 'string_escape', 'string-escape' + ): + # For one reason or another, these will raise an + # exception if we actually try to use them, so don't + # bother. + continue + encoded = soup.encode(encoding) + assert b'' in encoded + assert encoding.encode("ascii") not in encoded + + def test_processing_instruction(self): + markup = b"""\n""" + soup = self.soup(markup) + assert markup == soup.encode("utf8") + + def test_real_xhtml_document(self): + """A real XHTML document should come out *exactly* the same as it went in.""" + markup = b""" + + +Hello. +Goodbye. +""" + soup = self.soup(markup) + assert soup.encode("utf-8") == markup + + def test_nested_namespaces(self): + doc = b""" + + + + + +""" + soup = self.soup(doc) + assert doc == soup.encode() + + def test_formatter_processes_script_tag_for_xml_documents(self): + doc = """ + +""" + soup = BeautifulSoup(doc, "lxml-xml") + # lxml would have stripped this while parsing, but we can add + # it later. + soup.script.string = 'console.log("< < hey > > ");' + encoded = soup.encode() + assert b"< < hey > >" in encoded + + def test_can_parse_unicode_document(self): + markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + assert 'Sacr\xe9 bleu!' == soup.root.string + + def test_can_parse_unicode_document_begining_with_bom(self): + markup = '\N{BYTE ORDER MARK}Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + assert 'Sacr\xe9 bleu!' == soup.root.string + + def test_popping_namespaced_tag(self): + markup = 'b2012-07-02T20:33:42Zcd' + soup = self.soup(markup) + assert str(soup.rss) == markup + + def test_docstring_includes_correct_encoding(self): + soup = self.soup("") + assert soup.encode("latin1") == b'\n' + + def test_large_xml_document(self): + """A large XML document should come out the same as it went in.""" + markup = (b'\n' + + b'0' * (2**12) + + b'') + soup = self.soup(markup) + assert soup.encode("utf-8") == markup + + def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): + self.assert_soup("

", "

") + self.assert_soup("

foo

") + + def test_namespaces_are_preserved(self): + markup = 'This tag is in the a namespaceThis tag is in the b namespace' + soup = self.soup(markup) + root = soup.root + assert "http://example.com/" == root['xmlns:a'] + assert "http://example.net/" == root['xmlns:b'] + + def test_closing_namespaced_tag(self): + markup = '

20010504

' + soup = self.soup(markup) + assert str(soup.p) == markup + + def test_namespaced_attributes(self): + markup = '' + soup = self.soup(markup) + assert str(soup.foo) == markup + + def test_namespaced_attributes_xml_namespace(self): + markup = 'bar' + soup = self.soup(markup) + assert str(soup.foo) == markup + + def test_find_by_prefixed_name(self): + doc = """ +foo + bar + baz + +""" + soup = self.soup(doc) + + # There are three tags. + assert 3 == len(soup.find_all('tag')) + + # But two of them are ns1:tag and one of them is ns2:tag. + assert 2 == len(soup.find_all('ns1:tag')) + assert 1 == len(soup.find_all('ns2:tag')) + + assert 1, len(soup.find_all('ns2:tag', key='value')) + assert 3, len(soup.find_all(['ns1:tag', 'ns2:tag'])) + + def test_copy_tag_preserves_namespace(self): + xml = """ +""" + + soup = self.soup(xml) + tag = soup.document + duplicate = copy.copy(tag) + + # The two tags have the same namespace prefix. + assert tag.prefix == duplicate.prefix + + def test_worst_case(self): + """Test the worst case (currently) for linking issues.""" + + soup = self.soup(BAD_DOCUMENT) + self.linkage_validator(soup) + + +class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): + """Smoke test for a tree builder that supports HTML5.""" + + def test_real_xhtml_document(self): + # Since XHTML is not HTML5, HTML5 parsers are not tested to handle + # XHTML documents in any particular way. + pass + + def test_html_tags_have_namespace(self): + markup = "" + soup = self.soup(markup) + assert "http://www.w3.org/1999/xhtml" == soup.a.namespace + + def test_svg_tags_have_namespace(self): + markup = '' + soup = self.soup(markup) + namespace = "http://www.w3.org/2000/svg" + assert namespace == soup.svg.namespace + assert namespace == soup.circle.namespace + + + def test_mathml_tags_have_namespace(self): + markup = '5' + soup = self.soup(markup) + namespace = 'http://www.w3.org/1998/Math/MathML' + assert namespace == soup.math.namespace + assert namespace == soup.msqrt.namespace + + def test_xml_declaration_becomes_comment(self): + markup = '' + soup = self.soup(markup) + assert isinstance(soup.contents[0], Comment) + assert soup.contents[0] == '?xml version="1.0" encoding="utf-8"?' + assert "html" == soup.contents[0].next_element.name + +def skipIf(condition, reason): + def nothing(test, *args, **kwargs): + return None + + def decorator(test_item): + if condition: + return nothing + else: + return test_item + + return decorator diff --git a/libs/common/bs4/tests/test_builder.py b/libs/common/bs4/tests/test_builder.py new file mode 100644 index 00000000..75370712 --- /dev/null +++ b/libs/common/bs4/tests/test_builder.py @@ -0,0 +1,29 @@ +import pytest +from unittest.mock import patch +from bs4.builder import DetectsXMLParsedAsHTML + +class TestDetectsXMLParsedAsHTML(object): + + @pytest.mark.parametrize( + "markup,looks_like_xml", + [("No xml declaration", False), + ("obviously HTMLActually XHTML", False), + (" < html>Tricky XHTML", False), + ("", True), + ] + ) + def test_warn_if_markup_looks_like_xml(self, markup, looks_like_xml): + # Test of our ability to guess at whether markup looks XML-ish + # _and_ not HTML-ish. + with patch('bs4.builder.DetectsXMLParsedAsHTML._warn') as mock: + for data in markup, markup.encode('utf8'): + result = DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml( + data + ) + assert result == looks_like_xml + if looks_like_xml: + assert mock.called + else: + assert not mock.called + mock.reset_mock() diff --git a/libs/common/bs4/tests/test_builder_registry.py b/libs/common/bs4/tests/test_builder_registry.py index 90cad829..5fa874c8 100644 --- a/libs/common/bs4/tests/test_builder_registry.py +++ b/libs/common/bs4/tests/test_builder_registry.py @@ -1,6 +1,6 @@ """Tests of the builder registry.""" -import unittest +import pytest import warnings from bs4 import BeautifulSoup @@ -26,46 +26,36 @@ except ImportError: LXML_PRESENT = False -class BuiltInRegistryTest(unittest.TestCase): +class TestBuiltInRegistry(object): """Test the built-in registry with the default builders registered.""" def test_combination(self): + assert registry.lookup('strict', 'html') == HTMLParserTreeBuilder if LXML_PRESENT: - self.assertEqual(registry.lookup('fast', 'html'), - LXMLTreeBuilder) - - if LXML_PRESENT: - self.assertEqual(registry.lookup('permissive', 'xml'), - LXMLTreeBuilderForXML) - self.assertEqual(registry.lookup('strict', 'html'), - HTMLParserTreeBuilder) + assert registry.lookup('fast', 'html') == LXMLTreeBuilder + assert registry.lookup('permissive', 'xml') == LXMLTreeBuilderForXML if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html5lib', 'html'), - HTML5TreeBuilder) + assert registry.lookup('html5lib', 'html') == HTML5TreeBuilder def test_lookup_by_markup_type(self): if LXML_PRESENT: - self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) - self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) + assert registry.lookup('html') == LXMLTreeBuilder + assert registry.lookup('xml') == LXMLTreeBuilderForXML else: - self.assertEqual(registry.lookup('xml'), None) + assert registry.lookup('xml') == None if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) + assert registry.lookup('html') == HTML5TreeBuilder else: - self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) + assert registry.lookup('html') == HTMLParserTreeBuilder def test_named_library(self): if LXML_PRESENT: - self.assertEqual(registry.lookup('lxml', 'xml'), - LXMLTreeBuilderForXML) - self.assertEqual(registry.lookup('lxml', 'html'), - LXMLTreeBuilder) + assert registry.lookup('lxml', 'xml') == LXMLTreeBuilderForXML + assert registry.lookup('lxml', 'html') == LXMLTreeBuilder if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html5lib'), - HTML5TreeBuilder) + assert registry.lookup('html5lib') == HTML5TreeBuilder - self.assertEqual(registry.lookup('html.parser'), - HTMLParserTreeBuilder) + assert registry.lookup('html.parser') == HTMLParserTreeBuilder def test_beautifulsoup_constructor_does_lookup(self): @@ -77,16 +67,17 @@ class BuiltInRegistryTest(unittest.TestCase): BeautifulSoup("", features="html") # Or a list of strings. BeautifulSoup("", features=["html", "fast"]) - + pass + # You'll get an exception if BS can't find an appropriate # builder. - self.assertRaises(ValueError, BeautifulSoup, - "", features="no-such-feature") + with pytest.raises(ValueError): + BeautifulSoup("", features="no-such-feature") -class RegistryTest(unittest.TestCase): +class TestRegistry(object): """Test the TreeBuilderRegistry class in general.""" - def setUp(self): + def setup_method(self): self.registry = TreeBuilderRegistry() def builder_for_features(self, *feature_list): @@ -101,28 +92,28 @@ class RegistryTest(unittest.TestCase): # Since the builder advertises no features, you can't find it # by looking up features. - self.assertEqual(self.registry.lookup('foo'), None) + assert self.registry.lookup('foo') is None # But you can find it by doing a lookup with no features, if # this happens to be the only registered builder. - self.assertEqual(self.registry.lookup(), builder) + assert self.registry.lookup() == builder def test_register_with_features_makes_lookup_succeed(self): builder = self.builder_for_features('foo', 'bar') - self.assertEqual(self.registry.lookup('foo'), builder) - self.assertEqual(self.registry.lookup('bar'), builder) + assert self.registry.lookup('foo') is builder + assert self.registry.lookup('bar') is builder def test_lookup_fails_when_no_builder_implements_feature(self): builder = self.builder_for_features('foo', 'bar') - self.assertEqual(self.registry.lookup('baz'), None) + assert self.registry.lookup('baz') is None def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): builder1 = self.builder_for_features('foo') builder2 = self.builder_for_features('bar') - self.assertEqual(self.registry.lookup(), builder2) + assert self.registry.lookup() == builder2 def test_lookup_fails_when_no_tree_builders_registered(self): - self.assertEqual(self.registry.lookup(), None) + assert self.registry.lookup() is None def test_lookup_gets_most_recent_builder_supporting_all_features(self): has_one = self.builder_for_features('foo') @@ -134,14 +125,12 @@ class RegistryTest(unittest.TestCase): # There are two builders featuring 'foo' and 'bar', but # the one that also features 'quux' was registered later. - self.assertEqual(self.registry.lookup('foo', 'bar'), - has_both_late) + assert self.registry.lookup('foo', 'bar') == has_both_late # There is only one builder featuring 'foo', 'bar', and 'baz'. - self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), - has_both_early) + assert self.registry.lookup('foo', 'bar', 'baz') == has_both_early def test_lookup_fails_when_cannot_reconcile_requested_features(self): builder1 = self.builder_for_features('foo', 'bar') builder2 = self.builder_for_features('foo', 'baz') - self.assertEqual(self.registry.lookup('bar', 'baz'), None) + assert self.registry.lookup('bar', 'baz') is None diff --git a/libs/common/bs4/tests/test_dammit.py b/libs/common/bs4/tests/test_dammit.py new file mode 100644 index 00000000..9971234e --- /dev/null +++ b/libs/common/bs4/tests/test_dammit.py @@ -0,0 +1,371 @@ +# encoding: utf-8 +import pytest +import logging +import bs4 +from bs4 import BeautifulSoup +from bs4.dammit import ( + EntitySubstitution, + EncodingDetector, + UnicodeDammit, +) + +class TestUnicodeDammit(object): + """Standalone tests of UnicodeDammit.""" + + def test_unicode_input(self): + markup = "I'm already Unicode! \N{SNOWMAN}" + dammit = UnicodeDammit(markup) + assert dammit.unicode_markup == markup + + def test_smart_quotes_to_unicode(self): + markup = b"\x91\x92\x93\x94" + dammit = UnicodeDammit(markup) + assert dammit.unicode_markup == "\u2018\u2019\u201c\u201d" + + def test_smart_quotes_to_xml_entities(self): + markup = b"\x91\x92\x93\x94" + dammit = UnicodeDammit(markup, smart_quotes_to="xml") + assert dammit.unicode_markup == "‘’“”" + + def test_smart_quotes_to_html_entities(self): + markup = b"\x91\x92\x93\x94" + dammit = UnicodeDammit(markup, smart_quotes_to="html") + assert dammit.unicode_markup == "‘’“”" + + def test_smart_quotes_to_ascii(self): + markup = b"\x91\x92\x93\x94" + dammit = UnicodeDammit(markup, smart_quotes_to="ascii") + assert dammit.unicode_markup == """''""""" + + def test_detect_utf8(self): + utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83" + dammit = UnicodeDammit(utf8) + assert dammit.original_encoding.lower() == 'utf-8' + assert dammit.unicode_markup == 'Sacr\xe9 bleu! \N{SNOWMAN}' + + def test_convert_hebrew(self): + hebrew = b"\xed\xe5\xec\xf9" + dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) + assert dammit.original_encoding.lower() == 'iso-8859-8' + assert dammit.unicode_markup == '\u05dd\u05d5\u05dc\u05e9' + + def test_dont_see_smart_quotes_where_there_are_none(self): + utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" + dammit = UnicodeDammit(utf_8) + assert dammit.original_encoding.lower() == 'utf-8' + assert dammit.unicode_markup.encode("utf-8") == utf_8 + + def test_ignore_inappropriate_codecs(self): + utf8_data = "Räksmörgås".encode("utf-8") + dammit = UnicodeDammit(utf8_data, ["iso-8859-8"]) + assert dammit.original_encoding.lower() == 'utf-8' + + def test_ignore_invalid_codecs(self): + utf8_data = "Räksmörgås".encode("utf-8") + for bad_encoding in ['.utf8', '...', 'utF---16.!']: + dammit = UnicodeDammit(utf8_data, [bad_encoding]) + assert dammit.original_encoding.lower() == 'utf-8' + + def test_exclude_encodings(self): + # This is UTF-8. + utf8_data = "Räksmörgås".encode("utf-8") + + # But if we exclude UTF-8 from consideration, the guess is + # Windows-1252. + dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"]) + assert dammit.original_encoding.lower() == 'windows-1252' + + # And if we exclude that, there is no valid guess at all. + dammit = UnicodeDammit( + utf8_data, exclude_encodings=["utf-8", "windows-1252"]) + assert dammit.original_encoding == None + +class TestEncodingDetector(object): + + def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self): + detected = EncodingDetector( + b'') + encodings = list(detected.encodings) + assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings + + def test_detect_html5_style_meta_tag(self): + + for data in ( + b'', + b"", + b"", + b""): + dammit = UnicodeDammit(data, is_html=True) + assert "euc-jp" == dammit.original_encoding + + def test_last_ditch_entity_replacement(self): + # This is a UTF-8 document that contains bytestrings + # completely incompatible with UTF-8 (ie. encoded with some other + # encoding). + # + # Since there is no consistent encoding for the document, + # Unicode, Dammit will eventually encode the document as UTF-8 + # and encode the incompatible characters as REPLACEMENT + # CHARACTER. + # + # If chardet is installed, it will detect that the document + # can be converted into ISO-8859-1 without errors. This happens + # to be the wrong encoding, but it is a consistent encoding, so the + # code we're testing here won't run. + # + # So we temporarily disable chardet if it's present. + doc = b"""\357\273\277 +\330\250\330\252\330\261 +\310\322\321\220\312\321\355\344""" + chardet = bs4.dammit.chardet_dammit + logging.disable(logging.WARNING) + try: + def noop(str): + return None + bs4.dammit.chardet_dammit = noop + dammit = UnicodeDammit(doc) + assert True == dammit.contains_replacement_characters + assert "\ufffd" in dammit.unicode_markup + + soup = BeautifulSoup(doc, "html.parser") + assert soup.contains_replacement_characters + finally: + logging.disable(logging.NOTSET) + bs4.dammit.chardet_dammit = chardet + + def test_byte_order_mark_removed(self): + # A document written in UTF-16LE will have its byte order marker stripped. + data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00' + dammit = UnicodeDammit(data) + assert "áé" == dammit.unicode_markup + assert "utf-16le" == dammit.original_encoding + + def test_known_definite_versus_user_encodings(self): + # The known_definite_encodings are used before sniffing the + # byte-order mark; the user_encodings are used afterwards. + + # Here's a document in UTF-16LE. + data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00' + dammit = UnicodeDammit(data) + + # We can process it as UTF-16 by passing it in as a known + # definite encoding. + before = UnicodeDammit(data, known_definite_encodings=["utf-16"]) + assert "utf-16" == before.original_encoding + + # If we pass UTF-18 as a user encoding, it's not even + # tried--the encoding sniffed from the byte-order mark takes + # precedence. + after = UnicodeDammit(data, user_encodings=["utf-8"]) + assert "utf-16le" == after.original_encoding + assert ["utf-16le"] == [x[0] for x in dammit.tried_encodings] + + # Here's a document in ISO-8859-8. + hebrew = b"\xed\xe5\xec\xf9" + dammit = UnicodeDammit(hebrew, known_definite_encodings=["utf-8"], + user_encodings=["iso-8859-8"]) + + # The known_definite_encodings don't work, BOM sniffing does + # nothing (it only works for a few UTF encodings), but one of + # the user_encodings does work. + assert "iso-8859-8" == dammit.original_encoding + assert ["utf-8", "iso-8859-8"] == [x[0] for x in dammit.tried_encodings] + + def test_deprecated_override_encodings(self): + # override_encodings is a deprecated alias for + # known_definite_encodings. + hebrew = b"\xed\xe5\xec\xf9" + dammit = UnicodeDammit( + hebrew, + known_definite_encodings=["shift-jis"], + override_encodings=["utf-8"], + user_encodings=["iso-8859-8"], + ) + assert "iso-8859-8" == dammit.original_encoding + + # known_definite_encodings and override_encodings were tried + # before user_encodings. + assert ["shift-jis", "utf-8", "iso-8859-8"] == ( + [x[0] for x in dammit.tried_encodings] + ) + + def test_detwingle(self): + # Here's a UTF8 document. + utf8 = ("\N{SNOWMAN}" * 3).encode("utf8") + + # Here's a Windows-1252 document. + windows_1252 = ( + "\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" + "\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") + + # Through some unholy alchemy, they've been stuck together. + doc = utf8 + windows_1252 + utf8 + + # The document can't be turned into UTF-8: + with pytest.raises(UnicodeDecodeError): + doc.decode("utf8") + + # Unicode, Dammit thinks the whole document is Windows-1252, + # and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃" + + # But if we run it through fix_embedded_windows_1252, it's fixed: + fixed = UnicodeDammit.detwingle(doc) + assert "☃☃☃“Hi, I like Windows!”☃☃☃" == fixed.decode("utf8") + + def test_detwingle_ignores_multibyte_characters(self): + # Each of these characters has a UTF-8 representation ending + # in \x93. \x93 is a smart quote if interpreted as + # Windows-1252. But our code knows to skip over multibyte + # UTF-8 characters, so they'll survive the process unscathed. + for tricky_unicode_char in ( + "\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' + "\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' + "\xf0\x90\x90\x93", # This is a CJK character, not sure which one. + ): + input = tricky_unicode_char.encode("utf8") + assert input.endswith(b'\x93') + output = UnicodeDammit.detwingle(input) + assert output == input + + def test_find_declared_encoding(self): + # Test our ability to find a declared encoding inside an + # XML or HTML document. + # + # Even if the document comes in as Unicode, it may be + # interesting to know what encoding was claimed + # originally. + + html_unicode = '' + html_bytes = html_unicode.encode("ascii") + + xml_unicode= '' + xml_bytes = xml_unicode.encode("ascii") + + m = EncodingDetector.find_declared_encoding + assert m(html_unicode, is_html=False) is None + assert "utf-8" == m(html_unicode, is_html=True) + assert "utf-8" == m(html_bytes, is_html=True) + + assert "iso-8859-1" == m(xml_unicode) + assert "iso-8859-1" == m(xml_bytes) + + # Normally, only the first few kilobytes of a document are checked for + # an encoding. + spacer = b' ' * 5000 + assert m(spacer + html_bytes) is None + assert m(spacer + xml_bytes) is None + + # But you can tell find_declared_encoding to search an entire + # HTML document. + assert ( + m(spacer + html_bytes, is_html=True, search_entire_document=True) + == "utf-8" + ) + + # The XML encoding declaration has to be the very first thing + # in the document. We'll allow whitespace before the document + # starts, but nothing else. + assert m(xml_bytes, search_entire_document=True) == "iso-8859-1" + assert m(b' ' + xml_bytes, search_entire_document=True) == "iso-8859-1" + assert m(b'a' + xml_bytes, search_entire_document=True) is None + + +class TestEntitySubstitution(object): + """Standalone tests of the EntitySubstitution class.""" + def setup_method(self): + self.sub = EntitySubstitution + + def test_simple_html_substitution(self): + # Unicode characters corresponding to named HTML entites + # are substituted, and no others. + s = "foo\u2200\N{SNOWMAN}\u00f5bar" + assert self.sub.substitute_html(s) == "foo∀\N{SNOWMAN}õbar" + + def test_smart_quote_substitution(self): + # MS smart quotes are a common source of frustration, so we + # give them a special test. + quotes = b"\x91\x92foo\x93\x94" + dammit = UnicodeDammit(quotes) + assert self.sub.substitute_html(dammit.markup) == "‘’foo“”" + + def test_html5_entity(self): + # Some HTML5 entities correspond to single- or multi-character + # Unicode sequences. + + for entity, u in ( + # A few spot checks of our ability to recognize + # special character sequences and convert them + # to named entities. + ('⊧', '\u22a7'), + ('𝔑', '\U0001d511'), + ('≧̸', '\u2267\u0338'), + ('¬', '\xac'), + ('⫬', '\u2aec'), + + # We _could_ convert | to &verbarr;, but we don't, because + # | is an ASCII character. + ('|' '|'), + + # Similarly for the fj ligature, which we could convert to + # fj, but we don't. + ("fj", "fj"), + + # We do convert _these_ ASCII characters to HTML entities, + # because that's required to generate valid HTML. + ('>', '>'), + ('<', '<'), + ('&', '&'), + ): + template = '3 %s 4' + raw = template % u + with_entities = template % entity + assert self.sub.substitute_html(raw) == with_entities + + def test_html5_entity_with_variation_selector(self): + # Some HTML5 entities correspond either to a single-character + # Unicode sequence _or_ to the same character plus U+FE00, + # VARIATION SELECTOR 1. We can handle this. + data = "fjords \u2294 penguins" + markup = "fjords ⊔ penguins" + assert self.sub.substitute_html(data) == markup + + data = "fjords \u2294\ufe00 penguins" + markup = "fjords ⊔︀ penguins" + assert self.sub.substitute_html(data) == markup + + def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self): + s = 'Welcome to "my bar"' + assert self.sub.substitute_xml(s, False) == s + + def test_xml_attribute_quoting_normally_uses_double_quotes(self): + assert self.sub.substitute_xml("Welcome", True) == '"Welcome"' + assert self.sub.substitute_xml("Bob's Bar", True) == '"Bob\'s Bar"' + + def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self): + s = 'Welcome to "my bar"' + assert self.sub.substitute_xml(s, True) == "'Welcome to \"my bar\"'" + + def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self): + s = 'Welcome to "Bob\'s Bar"' + assert self.sub.substitute_xml(s, True) == '"Welcome to "Bob\'s Bar""' + + def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self): + quoted = 'Welcome to "Bob\'s Bar"' + assert self.sub.substitute_xml(quoted) == quoted + + def test_xml_quoting_handles_angle_brackets(self): + assert self.sub.substitute_xml("foo") == "foo<bar>" + + def test_xml_quoting_handles_ampersands(self): + assert self.sub.substitute_xml("AT&T") == "AT&T" + + def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self): + assert self.sub.substitute_xml("ÁT&T") == "&Aacute;T&T" + + def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self): + assert self.sub.substitute_xml_containing_entities("ÁT&T") == "ÁT&T" + + def test_quotes_not_html_substituted(self): + """There's no need to do this except inside attribute values.""" + text = 'Bob\'s "bar"' + assert self.sub.substitute_html(text) == text diff --git a/libs/common/bs4/tests/test_docs.py b/libs/common/bs4/tests/test_docs.py index 5b9f6770..0194d697 100644 --- a/libs/common/bs4/tests/test_docs.py +++ b/libs/common/bs4/tests/test_docs.py @@ -1,5 +1,7 @@ "Test harness for doctests." +# TODO: Pretty sure this isn't used and should be deleted. + # pylint: disable-msg=E0611,W0142 __metaclass__ = type diff --git a/libs/common/bs4/tests/test_element.py b/libs/common/bs4/tests/test_element.py new file mode 100644 index 00000000..6d08ab5d --- /dev/null +++ b/libs/common/bs4/tests/test_element.py @@ -0,0 +1,74 @@ +"""Tests of classes in element.py. + +The really big classes -- Tag, PageElement, and NavigableString -- +are tested in separate files. +""" + +from bs4.element import ( + CharsetMetaAttributeValue, + ContentMetaAttributeValue, + NamespacedAttribute, +) +from . import SoupTest + + +class TestNamedspacedAttribute(object): + + def test_name_may_be_none_or_missing(self): + a = NamespacedAttribute("xmlns", None) + assert a == "xmlns" + + a = NamespacedAttribute("xmlns", "") + assert a == "xmlns" + + a = NamespacedAttribute("xmlns") + assert a == "xmlns" + + def test_namespace_may_be_none_or_missing(self): + a = NamespacedAttribute(None, "tag") + assert a == "tag" + + a = NamespacedAttribute("", "tag") + assert a == "tag" + + def test_attribute_is_equivalent_to_colon_separated_string(self): + a = NamespacedAttribute("a", "b") + assert "a:b" == a + + def test_attributes_are_equivalent_if_prefix_and_name_identical(self): + a = NamespacedAttribute("a", "b", "c") + b = NamespacedAttribute("a", "b", "c") + assert a == b + + # The actual namespace is not considered. + c = NamespacedAttribute("a", "b", None) + assert a == c + + # But name and prefix are important. + d = NamespacedAttribute("a", "z", "c") + assert a != d + + e = NamespacedAttribute("z", "b", "c") + assert a != e + + +class TestAttributeValueWithCharsetSubstitution(object): + """Certain attributes are designed to have the charset of the + final document substituted into their value. + """ + + def test_content_meta_attribute_value(self): + # The value of a CharsetMetaAttributeValue is whatever + # encoding the string is in. + value = CharsetMetaAttributeValue("euc-jp") + assert "euc-jp" == value + assert "euc-jp" == value.original_value + assert "utf8" == value.encode("utf8") + assert "ascii" == value.encode("ascii") + + def test_content_meta_attribute_value(self): + value = ContentMetaAttributeValue("text/html; charset=euc-jp") + assert "text/html; charset=euc-jp" == value + assert "text/html; charset=euc-jp" == value.original_value + assert "text/html; charset=utf8" == value.encode("utf8") + assert "text/html; charset=ascii" == value.encode("ascii") diff --git a/libs/common/bs4/tests/test_formatter.py b/libs/common/bs4/tests/test_formatter.py new file mode 100644 index 00000000..84d4e3b2 --- /dev/null +++ b/libs/common/bs4/tests/test_formatter.py @@ -0,0 +1,113 @@ +import pytest + +from bs4.element import Tag +from bs4.formatter import ( + Formatter, + HTMLFormatter, + XMLFormatter, +) +from . import SoupTest + +class TestFormatter(SoupTest): + + def test_default_attributes(self): + # Test the default behavior of Formatter.attributes(). + formatter = Formatter() + tag = Tag(name="tag") + tag['b'] = 1 + tag['a'] = 2 + + # Attributes come out sorted by name. In Python 3, attributes + # normally come out of a dictionary in the order they were + # added. + assert [('a', 2), ('b', 1)] == formatter.attributes(tag) + + # This works even if Tag.attrs is None, though this shouldn't + # normally happen. + tag.attrs = None + assert [] == formatter.attributes(tag) + + assert ' ' == formatter.indent + + def test_sort_attributes(self): + # Test the ability to override Formatter.attributes() to, + # e.g., disable the normal sorting of attributes. + class UnsortedFormatter(Formatter): + def attributes(self, tag): + self.called_with = tag + for k, v in sorted(tag.attrs.items()): + if k == 'ignore': + continue + yield k,v + + soup = self.soup('

') + formatter = UnsortedFormatter() + decoded = soup.decode(formatter=formatter) + + # attributes() was called on the

tag. It filtered out one + # attribute and sorted the other two. + assert formatter.called_with == soup.p + assert '

' == decoded + + def test_empty_attributes_are_booleans(self): + # Test the behavior of empty_attributes_are_booleans as well + # as which Formatters have it enabled. + + for name in ('html', 'minimal', None): + formatter = HTMLFormatter.REGISTRY[name] + assert False == formatter.empty_attributes_are_booleans + + formatter = XMLFormatter.REGISTRY[None] + assert False == formatter.empty_attributes_are_booleans + + formatter = HTMLFormatter.REGISTRY['html5'] + assert True == formatter.empty_attributes_are_booleans + + # Verify that the constructor sets the value. + formatter = Formatter(empty_attributes_are_booleans=True) + assert True == formatter.empty_attributes_are_booleans + + # Now demonstrate what it does to markup. + for markup in ( + "", + '' + ): + soup = self.soup(markup) + for formatter in ('html', 'minimal', 'xml', None): + assert b'' == soup.option.encode(formatter='html') + assert b'' == soup.option.encode(formatter='html5') + + @pytest.mark.parametrize( + "indent,expect", + [ + (None, '\n\ntext\n\n'), + (-1, '\n\ntext\n\n'), + (0, '\n\ntext\n\n'), + ("", '\n\ntext\n\n'), + + (1, '\n \n text\n \n'), + (2, '\n \n text\n \n'), + + ("\t", '\n\t\n\t\ttext\n\t\n'), + ('abc', '\nabc\nabcabctext\nabc\n'), + + # Some invalid inputs -- the default behavior is used. + (object(), '\n \n text\n \n'), + (b'bytes', '\n \n text\n \n'), + ] + ) + def test_indent(self, indent, expect): + # Pretty-print a tree with a Formatter set to + # indent in a certain way and verify the results. + soup = self.soup("text") + formatter = Formatter(indent=indent) + assert soup.prettify(formatter=formatter) == expect + + # Pretty-printing only happens with prettify(), not + # encode(). + assert soup.encode(formatter=formatter) != expect + + def test_default_indent_value(self): + formatter = Formatter() + assert formatter.indent == ' ' + diff --git a/libs/common/bs4/tests/test_html5lib.py b/libs/common/bs4/tests/test_html5lib.py index 81fb7d3b..b32ab304 100644 --- a/libs/common/bs4/tests/test_html5lib.py +++ b/libs/common/bs4/tests/test_html5lib.py @@ -8,7 +8,7 @@ try: except ImportError as e: HTML5LIB_PRESENT = False from bs4.element import SoupStrainer -from bs4.testing import ( +from . import ( HTML5TreeBuilderSmokeTest, SoupTest, skipIf, @@ -17,12 +17,12 @@ from bs4.testing import ( @skipIf( not HTML5LIB_PRESENT, "html5lib seems not to be present, not testing its tree builder.") -class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): +class TestHTML5LibBuilder(SoupTest, HTML5TreeBuilderSmokeTest): """See ``HTML5TreeBuilderSmokeTest``.""" @property def default_builder(self): - return HTML5TreeBuilder() + return HTML5TreeBuilder def test_soupstrainer(self): # The html5lib tree builder does not support SoupStrainers. @@ -30,12 +30,9 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): markup = "

A bold statement.

" with warnings.catch_warnings(record=True) as w: soup = self.soup(markup, parse_only=strainer) - self.assertEqual( - soup.decode(), self.document_for(markup)) + assert soup.decode() == self.document_for(markup) - self.assertTrue( - "the html5lib tree builder doesn't support parse_only" in - str(w[0].message)) + assert "the html5lib tree builder doesn't support parse_only" in str(w[0].message) def test_correctly_nested_tables(self): """html5lib inserts tags where other parsers don't.""" @@ -46,13 +43,13 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): 'foo' '') - self.assertSoupEquals( + self.assert_soup( markup, '
Here\'s another table:' '
foo
' '
') - self.assertSoupEquals( + self.assert_soup( "" "" "
Foo
Bar
Baz
") @@ -69,20 +66,20 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): ''' soup = self.soup(markup) # Verify that we can reach the

tag; this means the tree is connected. - self.assertEqual(b"

foo

", soup.p.encode()) + assert b"

foo

" == soup.p.encode() def test_reparented_markup(self): markup = '

foo

\n

bar

' soup = self.soup(markup) - self.assertEqual("

foo

\n

bar

", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) + assert "

foo

\n

bar

" == soup.body.decode() + assert 2 == len(soup.find_all('p')) def test_reparented_markup_ends_with_whitespace(self): markup = '

foo

\n

bar

\n' soup = self.soup(markup) - self.assertEqual("

foo

\n

bar

\n", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) + assert "

foo

\n

bar

\n" == soup.body.decode() + assert 2 == len(soup.find_all('p')) def test_reparented_markup_containing_identical_whitespace_nodes(self): """Verify that we keep the two whitespace nodes in this @@ -99,7 +96,7 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): markup = '' soup = self.soup(markup) noscript = soup.noscript - self.assertEqual("target", noscript.next_element) + assert "target" == noscript.next_element target = soup.find(string='target') # The 'aftermath' string was duplicated; we want the second one. @@ -108,8 +105,8 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): # The