Move common libs to libs/common

This commit is contained in:
Labrys of Knossos 2018-12-16 13:30:24 -05:00
commit 1f4bd41bcc
1612 changed files with 962 additions and 10 deletions

View file

@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""DBCore is an abstract database package that forms the basis for beets'
Library.
"""
from __future__ import division, absolute_import, print_function
from .db import Model, Database
from .query import Query, FieldQuery, MatchQuery, AndQuery, OrQuery
from .types import Type
from .queryparse import query_from_strings
from .queryparse import sort_from_strings
from .queryparse import parse_sorted_query
from .query import InvalidQueryError
# flake8: noqa

View file

@ -0,0 +1,910 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""The central Model and Database constructs for DBCore.
"""
from __future__ import division, absolute_import, print_function
import time
import os
from collections import defaultdict
import threading
import sqlite3
import contextlib
import collections
import beets
from beets.util.functemplate import Template
from beets.util import py3_path
from beets.dbcore import types
from .query import MatchQuery, NullSort, TrueQuery
import six
class DBAccessError(Exception):
"""The SQLite database became inaccessible.
This can happen when trying to read or write the database when, for
example, the database file is deleted or otherwise disappears. There
is probably no way to recover from this error.
"""
class FormattedMapping(collections.Mapping):
"""A `dict`-like formatted view of a model.
The accessor `mapping[key]` returns the formatted version of
`model[key]` as a unicode string.
If `for_path` is true, all path separators in the formatted values
are replaced.
"""
def __init__(self, model, for_path=False):
self.for_path = for_path
self.model = model
self.model_keys = model.keys(True)
def __getitem__(self, key):
if key in self.model_keys:
return self._get_formatted(self.model, key)
else:
raise KeyError(key)
def __iter__(self):
return iter(self.model_keys)
def __len__(self):
return len(self.model_keys)
def get(self, key, default=None):
if default is None:
default = self.model._type(key).format(None)
return super(FormattedMapping, self).get(key, default)
def _get_formatted(self, model, key):
value = model._type(key).format(model.get(key))
if isinstance(value, bytes):
value = value.decode('utf-8', 'ignore')
if self.for_path:
sep_repl = beets.config['path_sep_replace'].as_str()
for sep in (os.path.sep, os.path.altsep):
if sep:
value = value.replace(sep, sep_repl)
return value
# Abstract base for model classes.
class Model(object):
"""An abstract object representing an object in the database. Model
objects act like dictionaries (i.e., the allow subscript access like
``obj['field']``). The same field set is available via attribute
access as a shortcut (i.e., ``obj.field``). Three kinds of attributes are
available:
* **Fixed attributes** come from a predetermined list of field
names. These fields correspond to SQLite table columns and are
thus fast to read, write, and query.
* **Flexible attributes** are free-form and do not need to be listed
ahead of time.
* **Computed attributes** are read-only fields computed by a getter
function provided by a plugin.
Access to all three field types is uniform: ``obj.field`` works the
same regardless of whether ``field`` is fixed, flexible, or
computed.
Model objects can optionally be associated with a `Library` object,
in which case they can be loaded and stored from the database. Dirty
flags are used to track which fields need to be stored.
"""
# Abstract components (to be provided by subclasses).
_table = None
"""The main SQLite table name.
"""
_flex_table = None
"""The flex field SQLite table name.
"""
_fields = {}
"""A mapping indicating available "fixed" fields on this type. The
keys are field names and the values are `Type` objects.
"""
_search_fields = ()
"""The fields that should be queried by default by unqualified query
terms.
"""
_types = {}
"""Optional Types for non-fixed (i.e., flexible and computed) fields.
"""
_sorts = {}
"""Optional named sort criteria. The keys are strings and the values
are subclasses of `Sort`.
"""
_always_dirty = False
"""By default, fields only become "dirty" when their value actually
changes. Enabling this flag marks fields as dirty even when the new
value is the same as the old value (e.g., `o.f = o.f`).
"""
@classmethod
def _getters(cls):
"""Return a mapping from field names to getter functions.
"""
# We could cache this if it becomes a performance problem to
# gather the getter mapping every time.
raise NotImplementedError()
def _template_funcs(self):
"""Return a mapping from function names to text-transformer
functions.
"""
# As above: we could consider caching this result.
raise NotImplementedError()
# Basic operation.
def __init__(self, db=None, **values):
"""Create a new object with an optional Database association and
initial field values.
"""
self._db = db
self._dirty = set()
self._values_fixed = {}
self._values_flex = {}
# Initial contents.
self.update(values)
self.clear_dirty()
@classmethod
def _awaken(cls, db=None, fixed_values={}, flex_values={}):
"""Create an object with values drawn from the database.
This is a performance optimization: the checks involved with
ordinary construction are bypassed.
"""
obj = cls(db)
for key, value in fixed_values.items():
obj._values_fixed[key] = cls._type(key).from_sql(value)
for key, value in flex_values.items():
obj._values_flex[key] = cls._type(key).from_sql(value)
return obj
def __repr__(self):
return '{0}({1})'.format(
type(self).__name__,
', '.join('{0}={1!r}'.format(k, v) for k, v in dict(self).items()),
)
def clear_dirty(self):
"""Mark all fields as *clean* (i.e., not needing to be stored to
the database).
"""
self._dirty = set()
def _check_db(self, need_id=True):
"""Ensure that this object is associated with a database row: it
has a reference to a database (`_db`) and an id. A ValueError
exception is raised otherwise.
"""
if not self._db:
raise ValueError(
u'{0} has no database'.format(type(self).__name__)
)
if need_id and not self.id:
raise ValueError(u'{0} has no id'.format(type(self).__name__))
def copy(self):
"""Create a copy of the model object.
The field values and other state is duplicated, but the new copy
remains associated with the same database as the old object.
(A simple `copy.deepcopy` will not work because it would try to
duplicate the SQLite connection.)
"""
new = self.__class__()
new._db = self._db
new._values_fixed = self._values_fixed.copy()
new._values_flex = self._values_flex.copy()
new._dirty = self._dirty.copy()
return new
# Essential field accessors.
@classmethod
def _type(cls, key):
"""Get the type of a field, a `Type` instance.
If the field has no explicit type, it is given the base `Type`,
which does no conversion.
"""
return cls._fields.get(key) or cls._types.get(key) or types.DEFAULT
def __getitem__(self, key):
"""Get the value for a field. Raise a KeyError if the field is
not available.
"""
getters = self._getters()
if key in getters: # Computed.
return getters[key](self)
elif key in self._fields: # Fixed.
return self._values_fixed.get(key, self._type(key).null)
elif key in self._values_flex: # Flexible.
return self._values_flex[key]
else:
raise KeyError(key)
def _setitem(self, key, value):
"""Assign the value for a field, return whether new and old value
differ.
"""
# Choose where to place the value.
if key in self._fields:
source = self._values_fixed
else:
source = self._values_flex
# If the field has a type, filter the value.
value = self._type(key).normalize(value)
# Assign value and possibly mark as dirty.
old_value = source.get(key)
source[key] = value
changed = old_value != value
if self._always_dirty or changed:
self._dirty.add(key)
return changed
def __setitem__(self, key, value):
"""Assign the value for a field.
"""
self._setitem(key, value)
def __delitem__(self, key):
"""Remove a flexible attribute from the model.
"""
if key in self._values_flex: # Flexible.
del self._values_flex[key]
self._dirty.add(key) # Mark for dropping on store.
elif key in self._getters(): # Computed.
raise KeyError(u'computed field {0} cannot be deleted'.format(key))
elif key in self._fields: # Fixed.
raise KeyError(u'fixed field {0} cannot be deleted'.format(key))
else:
raise KeyError(u'no such field {0}'.format(key))
def keys(self, computed=False):
"""Get a list of available field names for this object. The
`computed` parameter controls whether computed (plugin-provided)
fields are included in the key list.
"""
base_keys = list(self._fields) + list(self._values_flex.keys())
if computed:
return base_keys + list(self._getters().keys())
else:
return base_keys
@classmethod
def all_keys(cls):
"""Get a list of available keys for objects of this type.
Includes fixed and computed fields.
"""
return list(cls._fields) + list(cls._getters().keys())
# Act like a dictionary.
def update(self, values):
"""Assign all values in the given dict.
"""
for key, value in values.items():
self[key] = value
def items(self):
"""Iterate over (key, value) pairs that this object contains.
Computed fields are not included.
"""
for key in self:
yield key, self[key]
def get(self, key, default=None):
"""Get the value for a given key or `default` if it does not
exist.
"""
if key in self:
return self[key]
else:
return default
def __contains__(self, key):
"""Determine whether `key` is an attribute on this object.
"""
return key in self.keys(True)
def __iter__(self):
"""Iterate over the available field names (excluding computed
fields).
"""
return iter(self.keys())
# Convenient attribute access.
def __getattr__(self, key):
if key.startswith('_'):
raise AttributeError(u'model has no attribute {0!r}'.format(key))
else:
try:
return self[key]
except KeyError:
raise AttributeError(u'no such field {0!r}'.format(key))
def __setattr__(self, key, value):
if key.startswith('_'):
super(Model, self).__setattr__(key, value)
else:
self[key] = value
def __delattr__(self, key):
if key.startswith('_'):
super(Model, self).__delattr__(key)
else:
del self[key]
# Database interaction (CRUD methods).
def store(self, fields=None):
"""Save the object's metadata into the library database.
:param fields: the fields to be stored. If not specified, all fields
will be.
"""
if fields is None:
fields = self._fields
self._check_db()
# Build assignments for query.
assignments = []
subvars = []
for key in fields:
if key != 'id' and key in self._dirty:
self._dirty.remove(key)
assignments.append(key + '=?')
value = self._type(key).to_sql(self[key])
subvars.append(value)
assignments = ','.join(assignments)
with self._db.transaction() as tx:
# Main table update.
if assignments:
query = 'UPDATE {0} SET {1} WHERE id=?'.format(
self._table, assignments
)
subvars.append(self.id)
tx.mutate(query, subvars)
# Modified/added flexible attributes.
for key, value in self._values_flex.items():
if key in self._dirty:
self._dirty.remove(key)
tx.mutate(
'INSERT INTO {0} '
'(entity_id, key, value) '
'VALUES (?, ?, ?);'.format(self._flex_table),
(self.id, key, value),
)
# Deleted flexible attributes.
for key in self._dirty:
tx.mutate(
'DELETE FROM {0} '
'WHERE entity_id=? AND key=?'.format(self._flex_table),
(self.id, key)
)
self.clear_dirty()
def load(self):
"""Refresh the object's metadata from the library database.
"""
self._check_db()
stored_obj = self._db._get(type(self), self.id)
assert stored_obj is not None, u"object {0} not in DB".format(self.id)
self._values_fixed = {}
self._values_flex = {}
self.update(dict(stored_obj))
self.clear_dirty()
def remove(self):
"""Remove the object's associated rows from the database.
"""
self._check_db()
with self._db.transaction() as tx:
tx.mutate(
'DELETE FROM {0} WHERE id=?'.format(self._table),
(self.id,)
)
tx.mutate(
'DELETE FROM {0} WHERE entity_id=?'.format(self._flex_table),
(self.id,)
)
def add(self, db=None):
"""Add the object to the library database. This object must be
associated with a database; you can provide one via the `db`
parameter or use the currently associated database.
The object's `id` and `added` fields are set along with any
current field values.
"""
if db:
self._db = db
self._check_db(False)
with self._db.transaction() as tx:
new_id = tx.mutate(
'INSERT INTO {0} DEFAULT VALUES'.format(self._table)
)
self.id = new_id
self.added = time.time()
# Mark every non-null field as dirty and store.
for key in self:
if self[key] is not None:
self._dirty.add(key)
self.store()
# Formatting and templating.
_formatter = FormattedMapping
def formatted(self, for_path=False):
"""Get a mapping containing all values on this object formatted
as human-readable unicode strings.
"""
return self._formatter(self, for_path)
def evaluate_template(self, template, for_path=False):
"""Evaluate a template (a string or a `Template` object) using
the object's fields. If `for_path` is true, then no new path
separators will be added to the template.
"""
# Perform substitution.
if isinstance(template, six.string_types):
template = Template(template)
return template.substitute(self.formatted(for_path),
self._template_funcs())
# Parsing.
@classmethod
def _parse(cls, key, string):
"""Parse a string as a value for the given key.
"""
if not isinstance(string, six.string_types):
raise TypeError(u"_parse() argument must be a string")
return cls._type(key).parse(string)
def set_parse(self, key, string):
"""Set the object's key to a value represented by a string.
"""
self[key] = self._parse(key, string)
# Database controller and supporting interfaces.
class Results(object):
"""An item query result set. Iterating over the collection lazily
constructs LibModel objects that reflect database rows.
"""
def __init__(self, model_class, rows, db, query=None, sort=None):
"""Create a result set that will construct objects of type
`model_class`.
`model_class` is a subclass of `LibModel` that will be
constructed. `rows` is a query result: a list of mappings. The
new objects will be associated with the database `db`.
If `query` is provided, it is used as a predicate to filter the
results for a "slow query" that cannot be evaluated by the
database directly. If `sort` is provided, it is used to sort the
full list of results before returning. This means it is a "slow
sort" and all objects must be built before returning the first
one.
"""
self.model_class = model_class
self.rows = rows
self.db = db
self.query = query
self.sort = sort
# We keep a queue of rows we haven't yet consumed for
# materialization. We preserve the original total number of
# rows.
self._rows = rows
self._row_count = len(rows)
# The materialized objects corresponding to rows that have been
# consumed.
self._objects = []
def _get_objects(self):
"""Construct and generate Model objects for they query. The
objects are returned in the order emitted from the database; no
slow sort is applied.
For performance, this generator caches materialized objects to
avoid constructing them more than once. This way, iterating over
a `Results` object a second time should be much faster than the
first.
"""
index = 0 # Position in the materialized objects.
while index < len(self._objects) or self._rows:
# Are there previously-materialized objects to produce?
if index < len(self._objects):
yield self._objects[index]
index += 1
# Otherwise, we consume another row, materialize its object
# and produce it.
else:
while self._rows:
row = self._rows.pop(0)
obj = self._make_model(row)
# If there is a slow-query predicate, ensurer that the
# object passes it.
if not self.query or self.query.match(obj):
self._objects.append(obj)
index += 1
yield obj
break
def __iter__(self):
"""Construct and generate Model objects for all matching
objects, in sorted order.
"""
if self.sort:
# Slow sort. Must build the full list first.
objects = self.sort.sort(list(self._get_objects()))
return iter(objects)
else:
# Objects are pre-sorted (i.e., by the database).
return self._get_objects()
def _make_model(self, row):
# Get the flexible attributes for the object.
with self.db.transaction() as tx:
flex_rows = tx.query(
'SELECT * FROM {0} WHERE entity_id=?'.format(
self.model_class._flex_table
),
(row['id'],)
)
cols = dict(row)
values = dict((k, v) for (k, v) in cols.items()
if not k[:4] == 'flex')
flex_values = dict((row['key'], row['value']) for row in flex_rows)
# Construct the Python object
obj = self.model_class._awaken(self.db, values, flex_values)
return obj
def __len__(self):
"""Get the number of matching objects.
"""
if not self._rows:
# Fully materialized. Just count the objects.
return len(self._objects)
elif self.query:
# A slow query. Fall back to testing every object.
count = 0
for obj in self:
count += 1
return count
else:
# A fast query. Just count the rows.
return self._row_count
def __nonzero__(self):
"""Does this result contain any objects?
"""
return self.__bool__()
def __bool__(self):
"""Does this result contain any objects?
"""
return bool(len(self))
def __getitem__(self, n):
"""Get the nth item in this result set. This is inefficient: all
items up to n are materialized and thrown away.
"""
if not self._rows and not self.sort:
# Fully materialized and already in order. Just look up the
# object.
return self._objects[n]
it = iter(self)
try:
for i in range(n):
next(it)
return next(it)
except StopIteration:
raise IndexError(u'result index {0} out of range'.format(n))
def get(self):
"""Return the first matching object, or None if no objects
match.
"""
it = iter(self)
try:
return next(it)
except StopIteration:
return None
class Transaction(object):
"""A context manager for safe, concurrent access to the database.
All SQL commands should be executed through a transaction.
"""
def __init__(self, db):
self.db = db
def __enter__(self):
"""Begin a transaction. This transaction may be created while
another is active in a different thread.
"""
with self.db._tx_stack() as stack:
first = not stack
stack.append(self)
if first:
# Beginning a "root" transaction, which corresponds to an
# SQLite transaction.
self.db._db_lock.acquire()
return self
def __exit__(self, exc_type, exc_value, traceback):
"""Complete a transaction. This must be the most recently
entered but not yet exited transaction. If it is the last active
transaction, the database updates are committed.
"""
with self.db._tx_stack() as stack:
assert stack.pop() is self
empty = not stack
if empty:
# Ending a "root" transaction. End the SQLite transaction.
self.db._connection().commit()
self.db._db_lock.release()
def query(self, statement, subvals=()):
"""Execute an SQL statement with substitution values and return
a list of rows from the database.
"""
cursor = self.db._connection().execute(statement, subvals)
return cursor.fetchall()
def mutate(self, statement, subvals=()):
"""Execute an SQL statement with substitution values and return
the row ID of the last affected row.
"""
try:
cursor = self.db._connection().execute(statement, subvals)
return cursor.lastrowid
except sqlite3.OperationalError as e:
# In two specific cases, SQLite reports an error while accessing
# the underlying database file. We surface these exceptions as
# DBAccessError so the application can abort.
if e.args[0] in ("attempt to write a readonly database",
"unable to open database file"):
raise DBAccessError(e.args[0])
else:
raise
def script(self, statements):
"""Execute a string containing multiple SQL statements."""
self.db._connection().executescript(statements)
class Database(object):
"""A container for Model objects that wraps an SQLite database as
the backend.
"""
_models = ()
"""The Model subclasses representing tables in this database.
"""
def __init__(self, path, timeout=5.0):
self.path = path
self.timeout = timeout
self._connections = {}
self._tx_stacks = defaultdict(list)
# A lock to protect the _connections and _tx_stacks maps, which
# both map thread IDs to private resources.
self._shared_map_lock = threading.Lock()
# A lock to protect access to the database itself. SQLite does
# allow multiple threads to access the database at the same
# time, but many users were experiencing crashes related to this
# capability: where SQLite was compiled without HAVE_USLEEP, its
# backoff algorithm in the case of contention was causing
# whole-second sleeps (!) that would trigger its internal
# timeout. Using this lock ensures only one SQLite transaction
# is active at a time.
self._db_lock = threading.Lock()
# Set up database schema.
for model_cls in self._models:
self._make_table(model_cls._table, model_cls._fields)
self._make_attribute_table(model_cls._flex_table)
# Primitive access control: connections and transactions.
def _connection(self):
"""Get a SQLite connection object to the underlying database.
One connection object is created per thread.
"""
thread_id = threading.current_thread().ident
with self._shared_map_lock:
if thread_id in self._connections:
return self._connections[thread_id]
else:
conn = self._create_connection()
self._connections[thread_id] = conn
return conn
def _create_connection(self):
"""Create a SQLite connection to the underlying database.
Makes a new connection every time. If you need to configure the
connection settings (e.g., add custom functions), override this
method.
"""
# Make a new connection. The `sqlite3` module can't use
# bytestring paths here on Python 3, so we need to
# provide a `str` using `py3_path`.
conn = sqlite3.connect(
py3_path(self.path), timeout=self.timeout
)
# Access SELECT results like dictionaries.
conn.row_factory = sqlite3.Row
return conn
def _close(self):
"""Close the all connections to the underlying SQLite database
from all threads. This does not render the database object
unusable; new connections can still be opened on demand.
"""
with self._shared_map_lock:
self._connections.clear()
@contextlib.contextmanager
def _tx_stack(self):
"""A context manager providing access to the current thread's
transaction stack. The context manager synchronizes access to
the stack map. Transactions should never migrate across threads.
"""
thread_id = threading.current_thread().ident
with self._shared_map_lock:
yield self._tx_stacks[thread_id]
def transaction(self):
"""Get a :class:`Transaction` object for interacting directly
with the underlying SQLite database.
"""
return Transaction(self)
# Schema setup and migration.
def _make_table(self, table, fields):
"""Set up the schema of the database. `fields` is a mapping
from field names to `Type`s. Columns are added if necessary.
"""
# Get current schema.
with self.transaction() as tx:
rows = tx.query('PRAGMA table_info(%s)' % table)
current_fields = set([row[1] for row in rows])
field_names = set(fields.keys())
if current_fields.issuperset(field_names):
# Table exists and has all the required columns.
return
if not current_fields:
# No table exists.
columns = []
for name, typ in fields.items():
columns.append('{0} {1}'.format(name, typ.sql))
setup_sql = 'CREATE TABLE {0} ({1});\n'.format(table,
', '.join(columns))
else:
# Table exists does not match the field set.
setup_sql = ''
for name, typ in fields.items():
if name in current_fields:
continue
setup_sql += 'ALTER TABLE {0} ADD COLUMN {1} {2};\n'.format(
table, name, typ.sql
)
with self.transaction() as tx:
tx.script(setup_sql)
def _make_attribute_table(self, flex_table):
"""Create a table and associated index for flexible attributes
for the given entity (if they don't exist).
"""
with self.transaction() as tx:
tx.script("""
CREATE TABLE IF NOT EXISTS {0} (
id INTEGER PRIMARY KEY,
entity_id INTEGER,
key TEXT,
value TEXT,
UNIQUE(entity_id, key) ON CONFLICT REPLACE);
CREATE INDEX IF NOT EXISTS {0}_by_entity
ON {0} (entity_id);
""".format(flex_table))
# Querying.
def _fetch(self, model_cls, query=None, sort=None):
"""Fetch the objects of type `model_cls` matching the given
query. The query may be given as a string, string sequence, a
Query object, or None (to fetch everything). `sort` is an
`Sort` object.
"""
query = query or TrueQuery() # A null query.
sort = sort or NullSort() # Unsorted.
where, subvals = query.clause()
order_by = sort.order_clause()
sql = ("SELECT * FROM {0} WHERE {1} {2}").format(
model_cls._table,
where or '1',
"ORDER BY {0}".format(order_by) if order_by else '',
)
with self.transaction() as tx:
rows = tx.query(sql, subvals)
return Results(
model_cls, rows, self,
None if where else query, # Slow query component.
sort if sort.is_slow() else None, # Slow sort component.
)
def _get(self, model_cls, id):
"""Get a Model object by its id or None if the id does not
exist.
"""
return self._fetch(model_cls, MatchQuery('id', id)).get()

View file

@ -0,0 +1,944 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""The Query type hierarchy for DBCore.
"""
from __future__ import division, absolute_import, print_function
import re
from operator import mul
from beets import util
from datetime import datetime, timedelta
import unicodedata
from functools import reduce
import six
if not six.PY2:
buffer = memoryview # sqlite won't accept memoryview in python 2
class ParsingError(ValueError):
"""Abstract class for any unparseable user-requested album/query
specification.
"""
class InvalidQueryError(ParsingError):
"""Represent any kind of invalid query.
The query should be a unicode string or a list, which will be space-joined.
"""
def __init__(self, query, explanation):
if isinstance(query, list):
query = " ".join(query)
message = u"'{0}': {1}".format(query, explanation)
super(InvalidQueryError, self).__init__(message)
class InvalidQueryArgumentValueError(ParsingError):
"""Represent a query argument that could not be converted as expected.
It exists to be caught in upper stack levels so a meaningful (i.e. with the
query) InvalidQueryError can be raised.
"""
def __init__(self, what, expected, detail=None):
message = u"'{0}' is not {1}".format(what, expected)
if detail:
message = u"{0}: {1}".format(message, detail)
super(InvalidQueryArgumentValueError, self).__init__(message)
class Query(object):
"""An abstract class representing a query into the item database.
"""
def clause(self):
"""Generate an SQLite expression implementing the query.
Return (clause, subvals) where clause is a valid sqlite
WHERE clause implementing the query and subvals is a list of
items to be substituted for ?s in the clause.
"""
return None, ()
def match(self, item):
"""Check whether this query matches a given Item. Can be used to
perform queries on arbitrary sets of Items.
"""
raise NotImplementedError
def __repr__(self):
return "{0.__class__.__name__}()".format(self)
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return 0
class FieldQuery(Query):
"""An abstract query that searches in a specific field for a
pattern. Subclasses must provide a `value_match` class method, which
determines whether a certain pattern string matches a certain value
string. Subclasses may also provide `col_clause` to implement the
same matching functionality in SQLite.
"""
def __init__(self, field, pattern, fast=True):
self.field = field
self.pattern = pattern
self.fast = fast
def col_clause(self):
return None, ()
def clause(self):
if self.fast:
return self.col_clause()
else:
# Matching a flexattr. This is a slow query.
return None, ()
@classmethod
def value_match(cls, pattern, value):
"""Determine whether the value matches the pattern. Both
arguments are strings.
"""
raise NotImplementedError()
def match(self, item):
return self.value_match(self.pattern, item.get(self.field))
def __repr__(self):
return ("{0.__class__.__name__}({0.field!r}, {0.pattern!r}, "
"{0.fast})".format(self))
def __eq__(self, other):
return super(FieldQuery, self).__eq__(other) and \
self.field == other.field and self.pattern == other.pattern
def __hash__(self):
return hash((self.field, hash(self.pattern)))
class MatchQuery(FieldQuery):
"""A query that looks for exact matches in an item field."""
def col_clause(self):
return self.field + " = ?", [self.pattern]
@classmethod
def value_match(cls, pattern, value):
return pattern == value
class NoneQuery(FieldQuery):
"""A query that checks whether a field is null."""
def __init__(self, field, fast=True):
super(NoneQuery, self).__init__(field, None, fast)
def col_clause(self):
return self.field + " IS NULL", ()
@classmethod
def match(cls, item):
try:
return item[cls.field] is None
except KeyError:
return True
def __repr__(self):
return "{0.__class__.__name__}({0.field!r}, {0.fast})".format(self)
class StringFieldQuery(FieldQuery):
"""A FieldQuery that converts values to strings before matching
them.
"""
@classmethod
def value_match(cls, pattern, value):
"""Determine whether the value matches the pattern. The value
may have any type.
"""
return cls.string_match(pattern, util.as_string(value))
@classmethod
def string_match(cls, pattern, value):
"""Determine whether the value matches the pattern. Both
arguments are strings. Subclasses implement this method.
"""
raise NotImplementedError()
class SubstringQuery(StringFieldQuery):
"""A query that matches a substring in a specific item field."""
def col_clause(self):
pattern = (self.pattern
.replace('\\', '\\\\')
.replace('%', '\\%')
.replace('_', '\\_'))
search = '%' + pattern + '%'
clause = self.field + " like ? escape '\\'"
subvals = [search]
return clause, subvals
@classmethod
def string_match(cls, pattern, value):
return pattern.lower() in value.lower()
class RegexpQuery(StringFieldQuery):
"""A query that matches a regular expression in a specific item
field.
Raises InvalidQueryError when the pattern is not a valid regular
expression.
"""
def __init__(self, field, pattern, fast=True):
super(RegexpQuery, self).__init__(field, pattern, fast)
pattern = self._normalize(pattern)
try:
self.pattern = re.compile(self.pattern)
except re.error as exc:
# Invalid regular expression.
raise InvalidQueryArgumentValueError(pattern,
u"a regular expression",
format(exc))
@staticmethod
def _normalize(s):
"""Normalize a Unicode string's representation (used on both
patterns and matched values).
"""
return unicodedata.normalize('NFC', s)
@classmethod
def string_match(cls, pattern, value):
return pattern.search(cls._normalize(value)) is not None
class BooleanQuery(MatchQuery):
"""Matches a boolean field. Pattern should either be a boolean or a
string reflecting a boolean.
"""
def __init__(self, field, pattern, fast=True):
super(BooleanQuery, self).__init__(field, pattern, fast)
if isinstance(pattern, six.string_types):
self.pattern = util.str2bool(pattern)
self.pattern = int(self.pattern)
class BytesQuery(MatchQuery):
"""Match a raw bytes field (i.e., a path). This is a necessary hack
to work around the `sqlite3` module's desire to treat `bytes` and
`unicode` equivalently in Python 2. Always use this query instead of
`MatchQuery` when matching on BLOB values.
"""
def __init__(self, field, pattern):
super(BytesQuery, self).__init__(field, pattern)
# Use a buffer/memoryview representation of the pattern for SQLite
# matching. This instructs SQLite to treat the blob as binary
# rather than encoded Unicode.
if isinstance(self.pattern, (six.text_type, bytes)):
if isinstance(self.pattern, six.text_type):
self.pattern = self.pattern.encode('utf-8')
self.buf_pattern = buffer(self.pattern)
elif isinstance(self.pattern, buffer):
self.buf_pattern = self.pattern
self.pattern = bytes(self.pattern)
def col_clause(self):
return self.field + " = ?", [self.buf_pattern]
class NumericQuery(FieldQuery):
"""Matches numeric fields. A syntax using Ruby-style range ellipses
(``..``) lets users specify one- or two-sided ranges. For example,
``year:2001..`` finds music released since the turn of the century.
Raises InvalidQueryError when the pattern does not represent an int or
a float.
"""
def _convert(self, s):
"""Convert a string to a numeric type (float or int).
Return None if `s` is empty.
Raise an InvalidQueryError if the string cannot be converted.
"""
# This is really just a bit of fun premature optimization.
if not s:
return None
try:
return int(s)
except ValueError:
try:
return float(s)
except ValueError:
raise InvalidQueryArgumentValueError(s, u"an int or a float")
def __init__(self, field, pattern, fast=True):
super(NumericQuery, self).__init__(field, pattern, fast)
parts = pattern.split('..', 1)
if len(parts) == 1:
# No range.
self.point = self._convert(parts[0])
self.rangemin = None
self.rangemax = None
else:
# One- or two-sided range.
self.point = None
self.rangemin = self._convert(parts[0])
self.rangemax = self._convert(parts[1])
def match(self, item):
if self.field not in item:
return False
value = item[self.field]
if isinstance(value, six.string_types):
value = self._convert(value)
if self.point is not None:
return value == self.point
else:
if self.rangemin is not None and value < self.rangemin:
return False
if self.rangemax is not None and value > self.rangemax:
return False
return True
def col_clause(self):
if self.point is not None:
return self.field + '=?', (self.point,)
else:
if self.rangemin is not None and self.rangemax is not None:
return (u'{0} >= ? AND {0} <= ?'.format(self.field),
(self.rangemin, self.rangemax))
elif self.rangemin is not None:
return u'{0} >= ?'.format(self.field), (self.rangemin,)
elif self.rangemax is not None:
return u'{0} <= ?'.format(self.field), (self.rangemax,)
else:
return u'1', ()
class CollectionQuery(Query):
"""An abstract query class that aggregates other queries. Can be
indexed like a list to access the sub-queries.
"""
def __init__(self, subqueries=()):
self.subqueries = subqueries
# Act like a sequence.
def __len__(self):
return len(self.subqueries)
def __getitem__(self, key):
return self.subqueries[key]
def __iter__(self):
return iter(self.subqueries)
def __contains__(self, item):
return item in self.subqueries
def clause_with_joiner(self, joiner):
"""Return a clause created by joining together the clauses of
all subqueries with the string joiner (padded by spaces).
"""
clause_parts = []
subvals = []
for subq in self.subqueries:
subq_clause, subq_subvals = subq.clause()
if not subq_clause:
# Fall back to slow query.
return None, ()
clause_parts.append('(' + subq_clause + ')')
subvals += subq_subvals
clause = (' ' + joiner + ' ').join(clause_parts)
return clause, subvals
def __repr__(self):
return "{0.__class__.__name__}({0.subqueries!r})".format(self)
def __eq__(self, other):
return super(CollectionQuery, self).__eq__(other) and \
self.subqueries == other.subqueries
def __hash__(self):
"""Since subqueries are mutable, this object should not be hashable.
However and for conveniences purposes, it can be hashed.
"""
return reduce(mul, map(hash, self.subqueries), 1)
class AnyFieldQuery(CollectionQuery):
"""A query that matches if a given FieldQuery subclass matches in
any field. The individual field query class is provided to the
constructor.
"""
def __init__(self, pattern, fields, cls):
self.pattern = pattern
self.fields = fields
self.query_class = cls
subqueries = []
for field in self.fields:
subqueries.append(cls(field, pattern, True))
super(AnyFieldQuery, self).__init__(subqueries)
def clause(self):
return self.clause_with_joiner('or')
def match(self, item):
for subq in self.subqueries:
if subq.match(item):
return True
return False
def __repr__(self):
return ("{0.__class__.__name__}({0.pattern!r}, {0.fields!r}, "
"{0.query_class.__name__})".format(self))
def __eq__(self, other):
return super(AnyFieldQuery, self).__eq__(other) and \
self.query_class == other.query_class
def __hash__(self):
return hash((self.pattern, tuple(self.fields), self.query_class))
class MutableCollectionQuery(CollectionQuery):
"""A collection query whose subqueries may be modified after the
query is initialized.
"""
def __setitem__(self, key, value):
self.subqueries[key] = value
def __delitem__(self, key):
del self.subqueries[key]
class AndQuery(MutableCollectionQuery):
"""A conjunction of a list of other queries."""
def clause(self):
return self.clause_with_joiner('and')
def match(self, item):
return all([q.match(item) for q in self.subqueries])
class OrQuery(MutableCollectionQuery):
"""A conjunction of a list of other queries."""
def clause(self):
return self.clause_with_joiner('or')
def match(self, item):
return any([q.match(item) for q in self.subqueries])
class NotQuery(Query):
"""A query that matches the negation of its `subquery`, as a shorcut for
performing `not(subquery)` without using regular expressions.
"""
def __init__(self, subquery):
self.subquery = subquery
def clause(self):
clause, subvals = self.subquery.clause()
if clause:
return 'not ({0})'.format(clause), subvals
else:
# If there is no clause, there is nothing to negate. All the logic
# is handled by match() for slow queries.
return clause, subvals
def match(self, item):
return not self.subquery.match(item)
def __repr__(self):
return "{0.__class__.__name__}({0.subquery!r})".format(self)
def __eq__(self, other):
return super(NotQuery, self).__eq__(other) and \
self.subquery == other.subquery
def __hash__(self):
return hash(('not', hash(self.subquery)))
class TrueQuery(Query):
"""A query that always matches."""
def clause(self):
return '1', ()
def match(self, item):
return True
class FalseQuery(Query):
"""A query that never matches."""
def clause(self):
return '0', ()
def match(self, item):
return False
# Time/date queries.
def _to_epoch_time(date):
"""Convert a `datetime` object to an integer number of seconds since
the (local) Unix epoch.
"""
if hasattr(date, 'timestamp'):
# The `timestamp` method exists on Python 3.3+.
return int(date.timestamp())
else:
epoch = datetime.fromtimestamp(0)
delta = date - epoch
return int(delta.total_seconds())
def _parse_periods(pattern):
"""Parse a string containing two dates separated by two dots (..).
Return a pair of `Period` objects.
"""
parts = pattern.split('..', 1)
if len(parts) == 1:
instant = Period.parse(parts[0])
return (instant, instant)
else:
start = Period.parse(parts[0])
end = Period.parse(parts[1])
return (start, end)
class Period(object):
"""A period of time given by a date, time and precision.
Example: 2014-01-01 10:50:30 with precision 'month' represents all
instants of time during January 2014.
"""
precisions = ('year', 'month', 'day', 'hour', 'minute', 'second')
date_formats = (
('%Y',), # year
('%Y-%m',), # month
('%Y-%m-%d',), # day
('%Y-%m-%dT%H', '%Y-%m-%d %H'), # hour
('%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M'), # minute
('%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S') # second
)
relative_units = {'y': 365, 'm': 30, 'w': 7, 'd': 1}
relative_re = '(?P<sign>[+|-]?)(?P<quantity>[0-9]+)' + \
'(?P<timespan>[y|m|w|d])'
def __init__(self, date, precision):
"""Create a period with the given date (a `datetime` object) and
precision (a string, one of "year", "month", "day", "hour", "minute",
or "second").
"""
if precision not in Period.precisions:
raise ValueError(u'Invalid precision {0}'.format(precision))
self.date = date
self.precision = precision
@classmethod
def parse(cls, string):
"""Parse a date and return a `Period` object or `None` if the
string is empty, or raise an InvalidQueryArgumentValueError if
the string cannot be parsed to a date.
The date may be absolute or relative. Absolute dates look like
`YYYY`, or `YYYY-MM-DD`, or `YYYY-MM-DD HH:MM:SS`, etc. Relative
dates have three parts:
- Optionally, a ``+`` or ``-`` sign indicating the future or the
past. The default is the future.
- A number: how much to add or subtract.
- A letter indicating the unit: days, weeks, months or years
(``d``, ``w``, ``m`` or ``y``). A "month" is exactly 30 days
and a "year" is exactly 365 days.
"""
def find_date_and_format(string):
for ord, format in enumerate(cls.date_formats):
for format_option in format:
try:
date = datetime.strptime(string, format_option)
return date, ord
except ValueError:
# Parsing failed.
pass
return (None, None)
if not string:
return None
# Check for a relative date.
match_dq = re.match(cls.relative_re, string)
if match_dq:
sign = match_dq.group('sign')
quantity = match_dq.group('quantity')
timespan = match_dq.group('timespan')
# Add or subtract the given amount of time from the current
# date.
multiplier = -1 if sign == '-' else 1
days = cls.relative_units[timespan]
date = datetime.now() + \
timedelta(days=int(quantity) * days) * multiplier
return cls(date, cls.precisions[5])
# Check for an absolute date.
date, ordinal = find_date_and_format(string)
if date is None:
raise InvalidQueryArgumentValueError(string,
'a valid date/time string')
precision = cls.precisions[ordinal]
return cls(date, precision)
def open_right_endpoint(self):
"""Based on the precision, convert the period to a precise
`datetime` for use as a right endpoint in a right-open interval.
"""
precision = self.precision
date = self.date
if 'year' == self.precision:
return date.replace(year=date.year + 1, month=1)
elif 'month' == precision:
if (date.month < 12):
return date.replace(month=date.month + 1)
else:
return date.replace(year=date.year + 1, month=1)
elif 'day' == precision:
return date + timedelta(days=1)
elif 'hour' == precision:
return date + timedelta(hours=1)
elif 'minute' == precision:
return date + timedelta(minutes=1)
elif 'second' == precision:
return date + timedelta(seconds=1)
else:
raise ValueError(u'unhandled precision {0}'.format(precision))
class DateInterval(object):
"""A closed-open interval of dates.
A left endpoint of None means since the beginning of time.
A right endpoint of None means towards infinity.
"""
def __init__(self, start, end):
if start is not None and end is not None and not start < end:
raise ValueError(u"start date {0} is not before end date {1}"
.format(start, end))
self.start = start
self.end = end
@classmethod
def from_periods(cls, start, end):
"""Create an interval with two Periods as the endpoints.
"""
end_date = end.open_right_endpoint() if end is not None else None
start_date = start.date if start is not None else None
return cls(start_date, end_date)
def contains(self, date):
if self.start is not None and date < self.start:
return False
if self.end is not None and date >= self.end:
return False
return True
def __str__(self):
return '[{0}, {1})'.format(self.start, self.end)
class DateQuery(FieldQuery):
"""Matches date fields stored as seconds since Unix epoch time.
Dates can be specified as ``year-month-day`` strings where only year
is mandatory.
The value of a date field can be matched against a date interval by
using an ellipsis interval syntax similar to that of NumericQuery.
"""
def __init__(self, field, pattern, fast=True):
super(DateQuery, self).__init__(field, pattern, fast)
start, end = _parse_periods(pattern)
self.interval = DateInterval.from_periods(start, end)
def match(self, item):
if self.field not in item:
return False
timestamp = float(item[self.field])
date = datetime.fromtimestamp(timestamp)
return self.interval.contains(date)
_clause_tmpl = "{0} {1} ?"
def col_clause(self):
clause_parts = []
subvals = []
if self.interval.start:
clause_parts.append(self._clause_tmpl.format(self.field, ">="))
subvals.append(_to_epoch_time(self.interval.start))
if self.interval.end:
clause_parts.append(self._clause_tmpl.format(self.field, "<"))
subvals.append(_to_epoch_time(self.interval.end))
if clause_parts:
# One- or two-sided interval.
clause = ' AND '.join(clause_parts)
else:
# Match any date.
clause = '1'
return clause, subvals
class DurationQuery(NumericQuery):
"""NumericQuery that allow human-friendly (M:SS) time interval formats.
Converts the range(s) to a float value, and delegates on NumericQuery.
Raises InvalidQueryError when the pattern does not represent an int, float
or M:SS time interval.
"""
def _convert(self, s):
"""Convert a M:SS or numeric string to a float.
Return None if `s` is empty.
Raise an InvalidQueryError if the string cannot be converted.
"""
if not s:
return None
try:
return util.raw_seconds_short(s)
except ValueError:
try:
return float(s)
except ValueError:
raise InvalidQueryArgumentValueError(
s,
u"a M:SS string or a float")
# Sorting.
class Sort(object):
"""An abstract class representing a sort operation for a query into
the item database.
"""
def order_clause(self):
"""Generates a SQL fragment to be used in a ORDER BY clause, or
None if no fragment is used (i.e., this is a slow sort).
"""
return None
def sort(self, items):
"""Sort the list of objects and return a list.
"""
return sorted(items)
def is_slow(self):
"""Indicate whether this query is *slow*, meaning that it cannot
be executed in SQL and must be executed in Python.
"""
return False
def __hash__(self):
return 0
def __eq__(self, other):
return type(self) == type(other)
class MultipleSort(Sort):
"""Sort that encapsulates multiple sub-sorts.
"""
def __init__(self, sorts=None):
self.sorts = sorts or []
def add_sort(self, sort):
self.sorts.append(sort)
def _sql_sorts(self):
"""Return the list of sub-sorts for which we can be (at least
partially) fast.
A contiguous suffix of fast (SQL-capable) sub-sorts are
executable in SQL. The remaining, even if they are fast
independently, must be executed slowly.
"""
sql_sorts = []
for sort in reversed(self.sorts):
if not sort.order_clause() is None:
sql_sorts.append(sort)
else:
break
sql_sorts.reverse()
return sql_sorts
def order_clause(self):
order_strings = []
for sort in self._sql_sorts():
order = sort.order_clause()
order_strings.append(order)
return ", ".join(order_strings)
def is_slow(self):
for sort in self.sorts:
if sort.is_slow():
return True
return False
def sort(self, items):
slow_sorts = []
switch_slow = False
for sort in reversed(self.sorts):
if switch_slow:
slow_sorts.append(sort)
elif sort.order_clause() is None:
switch_slow = True
slow_sorts.append(sort)
else:
pass
for sort in slow_sorts:
items = sort.sort(items)
return items
def __repr__(self):
return 'MultipleSort({!r})'.format(self.sorts)
def __hash__(self):
return hash(tuple(self.sorts))
def __eq__(self, other):
return super(MultipleSort, self).__eq__(other) and \
self.sorts == other.sorts
class FieldSort(Sort):
"""An abstract sort criterion that orders by a specific field (of
any kind).
"""
def __init__(self, field, ascending=True, case_insensitive=True):
self.field = field
self.ascending = ascending
self.case_insensitive = case_insensitive
def sort(self, objs):
# TODO: Conversion and null-detection here. In Python 3,
# comparisons with None fail. We should also support flexible
# attributes with different types without falling over.
def key(item):
field_val = item.get(self.field, '')
if self.case_insensitive and isinstance(field_val, six.text_type):
field_val = field_val.lower()
return field_val
return sorted(objs, key=key, reverse=not self.ascending)
def __repr__(self):
return '<{0}: {1}{2}>'.format(
type(self).__name__,
self.field,
'+' if self.ascending else '-',
)
def __hash__(self):
return hash((self.field, self.ascending))
def __eq__(self, other):
return super(FieldSort, self).__eq__(other) and \
self.field == other.field and \
self.ascending == other.ascending
class FixedFieldSort(FieldSort):
"""Sort object to sort on a fixed field.
"""
def order_clause(self):
order = "ASC" if self.ascending else "DESC"
if self.case_insensitive:
field = '(CASE ' \
'WHEN TYPEOF({0})="text" THEN LOWER({0}) ' \
'WHEN TYPEOF({0})="blob" THEN LOWER({0}) ' \
'ELSE {0} END)'.format(self.field)
else:
field = self.field
return "{0} {1}".format(field, order)
class SlowFieldSort(FieldSort):
"""A sort criterion by some model field other than a fixed field:
i.e., a computed or flexible field.
"""
def is_slow(self):
return True
class NullSort(Sort):
"""No sorting. Leave results unsorted."""
def sort(self, items):
return items
def __nonzero__(self):
return self.__bool__()
def __bool__(self):
return False
def __eq__(self, other):
return type(self) == type(other) or other is None
def __hash__(self):
return 0

View file

@ -0,0 +1,250 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Parsing of strings into DBCore queries.
"""
from __future__ import division, absolute_import, print_function
import re
import itertools
from . import query
import beets
PARSE_QUERY_PART_REGEX = re.compile(
# Non-capturing optional segment for the keyword.
r'(-|\^)?' # Negation prefixes.
r'(?:'
r'(\S+?)' # The field key.
r'(?<!\\):' # Unescaped :
r')?'
r'(.*)', # The term itself.
re.I # Case-insensitive.
)
def parse_query_part(part, query_classes={}, prefixes={},
default_class=query.SubstringQuery):
"""Parse a single *query part*, which is a chunk of a complete query
string representing a single criterion.
A query part is a string consisting of:
- A *pattern*: the value to look for.
- Optionally, a *field name* preceding the pattern, separated by a
colon. So in `foo:bar`, `foo` is the field name and `bar` is the
pattern.
- Optionally, a *query prefix* just before the pattern (and after the
optional colon) indicating the type of query that should be used. For
example, in `~foo`, `~` might be a prefix. (The set of prefixes to
look for is given in the `prefixes` parameter.)
- Optionally, a negation indicator, `-` or `^`, at the very beginning.
Both prefixes and the separating `:` character may be escaped with a
backslash to avoid their normal meaning.
The function returns a tuple consisting of:
- The field name: a string or None if it's not present.
- The pattern, a string.
- The query class to use, which inherits from the base
:class:`Query` type.
- A negation flag, a bool.
The three optional parameters determine which query class is used (i.e.,
the third return value). They are:
- `query_classes`, which maps field names to query classes. These
are used when no explicit prefix is present.
- `prefixes`, which maps prefix strings to query classes.
- `default_class`, the fallback when neither the field nor a prefix
indicates a query class.
So the precedence for determining which query class to return is:
prefix, followed by field, and finally the default.
For example, assuming the `:` prefix is used for `RegexpQuery`:
- `'stapler'` -> `(None, 'stapler', SubstringQuery, False)`
- `'color:red'` -> `('color', 'red', SubstringQuery, False)`
- `':^Quiet'` -> `(None, '^Quiet', RegexpQuery, False)`, because
the `^` follows the `:`
- `'color::b..e'` -> `('color', 'b..e', RegexpQuery, False)`
- `'-color:red'` -> `('color', 'red', SubstringQuery, True)`
"""
# Apply the regular expression and extract the components.
part = part.strip()
match = PARSE_QUERY_PART_REGEX.match(part)
assert match # Regex should always match
negate = bool(match.group(1))
key = match.group(2)
term = match.group(3).replace('\:', ':')
# Check whether there's a prefix in the query and use the
# corresponding query type.
for pre, query_class in prefixes.items():
if term.startswith(pre):
return key, term[len(pre):], query_class, negate
# No matching prefix, so use either the query class determined by
# the field or the default as a fallback.
query_class = query_classes.get(key, default_class)
return key, term, query_class, negate
def construct_query_part(model_cls, prefixes, query_part):
"""Parse a *query part* string and return a :class:`Query` object.
:param model_cls: The :class:`Model` class that this is a query for.
This is used to determine the appropriate query types for the
model's fields.
:param prefixes: A map from prefix strings to :class:`Query` types.
:param query_part: The string to parse.
See the documentation for `parse_query_part` for more information on
query part syntax.
"""
# A shortcut for empty query parts.
if not query_part:
return query.TrueQuery()
# Use `model_cls` to build up a map from field names to `Query`
# classes.
query_classes = {}
for k, t in itertools.chain(model_cls._fields.items(),
model_cls._types.items()):
query_classes[k] = t.query
# Parse the string.
key, pattern, query_class, negate = \
parse_query_part(query_part, query_classes, prefixes)
# If there's no key (field name) specified, this is a "match
# anything" query.
if key is None:
if issubclass(query_class, query.FieldQuery):
# The query type matches a specific field, but none was
# specified. So we use a version of the query that matches
# any field.
q = query.AnyFieldQuery(pattern, model_cls._search_fields,
query_class)
if negate:
return query.NotQuery(q)
else:
return q
else:
# Non-field query type.
if negate:
return query.NotQuery(query_class(pattern))
else:
return query_class(pattern)
# Otherwise, this must be a `FieldQuery`. Use the field name to
# construct the query object.
key = key.lower()
q = query_class(key.lower(), pattern, key in model_cls._fields)
if negate:
return query.NotQuery(q)
return q
def query_from_strings(query_cls, model_cls, prefixes, query_parts):
"""Creates a collection query of type `query_cls` from a list of
strings in the format used by parse_query_part. `model_cls`
determines how queries are constructed from strings.
"""
subqueries = []
for part in query_parts:
subqueries.append(construct_query_part(model_cls, prefixes, part))
if not subqueries: # No terms in query.
subqueries = [query.TrueQuery()]
return query_cls(subqueries)
def construct_sort_part(model_cls, part):
"""Create a `Sort` from a single string criterion.
`model_cls` is the `Model` being queried. `part` is a single string
ending in ``+`` or ``-`` indicating the sort.
"""
assert part, "part must be a field name and + or -"
field = part[:-1]
assert field, "field is missing"
direction = part[-1]
assert direction in ('+', '-'), "part must end with + or -"
is_ascending = direction == '+'
case_insensitive = beets.config['sort_case_insensitive'].get(bool)
if field in model_cls._sorts:
sort = model_cls._sorts[field](model_cls, is_ascending,
case_insensitive)
elif field in model_cls._fields:
sort = query.FixedFieldSort(field, is_ascending, case_insensitive)
else:
# Flexible or computed.
sort = query.SlowFieldSort(field, is_ascending, case_insensitive)
return sort
def sort_from_strings(model_cls, sort_parts):
"""Create a `Sort` from a list of sort criteria (strings).
"""
if not sort_parts:
sort = query.NullSort()
elif len(sort_parts) == 1:
sort = construct_sort_part(model_cls, sort_parts[0])
else:
sort = query.MultipleSort()
for part in sort_parts:
sort.add_sort(construct_sort_part(model_cls, part))
return sort
def parse_sorted_query(model_cls, parts, prefixes={}):
"""Given a list of strings, create the `Query` and `Sort` that they
represent.
"""
# Separate query token and sort token.
query_parts = []
sort_parts = []
# Split up query in to comma-separated subqueries, each representing
# an AndQuery, which need to be joined together in one OrQuery
subquery_parts = []
for part in parts + [u',']:
if part.endswith(u','):
# Ensure we can catch "foo, bar" as well as "foo , bar"
last_subquery_part = part[:-1]
if last_subquery_part:
subquery_parts.append(last_subquery_part)
# Parse the subquery in to a single AndQuery
# TODO: Avoid needlessly wrapping AndQueries containing 1 subquery?
query_parts.append(query_from_strings(
query.AndQuery, model_cls, prefixes, subquery_parts
))
del subquery_parts[:]
else:
# Sort parts (1) end in + or -, (2) don't have a field, and
# (3) consist of more than just the + or -.
if part.endswith((u'+', u'-')) \
and u':' not in part \
and len(part) > 1:
sort_parts.append(part)
else:
subquery_parts.append(part)
# Avoid needlessly wrapping single statements in an OR
q = query.OrQuery(query_parts) if len(query_parts) > 1 else query_parts[0]
s = sort_from_strings(model_cls, sort_parts)
return q, s

View file

@ -0,0 +1,215 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Representation of type information for DBCore model fields.
"""
from __future__ import division, absolute_import, print_function
from . import query
from beets.util import str2bool
import six
if not six.PY2:
buffer = memoryview # sqlite won't accept memoryview in python 2
# Abstract base.
class Type(object):
"""An object encapsulating the type of a model field. Includes
information about how to store, query, format, and parse a given
field.
"""
sql = u'TEXT'
"""The SQLite column type for the value.
"""
query = query.SubstringQuery
"""The `Query` subclass to be used when querying the field.
"""
model_type = six.text_type
"""The Python type that is used to represent the value in the model.
The model is guaranteed to return a value of this type if the field
is accessed. To this end, the constructor is used by the `normalize`
and `from_sql` methods and the `default` property.
"""
@property
def null(self):
"""The value to be exposed when the underlying value is None.
"""
return self.model_type()
def format(self, value):
"""Given a value of this type, produce a Unicode string
representing the value. This is used in template evaluation.
"""
if value is None:
value = self.null
# `self.null` might be `None`
if value is None:
value = u''
if isinstance(value, bytes):
value = value.decode('utf-8', 'ignore')
return six.text_type(value)
def parse(self, string):
"""Parse a (possibly human-written) string and return the
indicated value of this type.
"""
try:
return self.model_type(string)
except ValueError:
return self.null
def normalize(self, value):
"""Given a value that will be assigned into a field of this
type, normalize the value to have the appropriate type. This
base implementation only reinterprets `None`.
"""
if value is None:
return self.null
else:
# TODO This should eventually be replaced by
# `self.model_type(value)`
return value
def from_sql(self, sql_value):
"""Receives the value stored in the SQL backend and return the
value to be stored in the model.
For fixed fields the type of `value` is determined by the column
type affinity given in the `sql` property and the SQL to Python
mapping of the database adapter. For more information see:
http://www.sqlite.org/datatype3.html
https://docs.python.org/2/library/sqlite3.html#sqlite-and-python-types
Flexible fields have the type affinity `TEXT`. This means the
`sql_value` is either a `buffer`/`memoryview` or a `unicode` object`
and the method must handle these in addition.
"""
if isinstance(sql_value, buffer):
sql_value = bytes(sql_value).decode('utf-8', 'ignore')
if isinstance(sql_value, six.text_type):
return self.parse(sql_value)
else:
return self.normalize(sql_value)
def to_sql(self, model_value):
"""Convert a value as stored in the model object to a value used
by the database adapter.
"""
return model_value
# Reusable types.
class Default(Type):
null = None
class Integer(Type):
"""A basic integer type.
"""
sql = u'INTEGER'
query = query.NumericQuery
model_type = int
class PaddedInt(Integer):
"""An integer field that is formatted with a given number of digits,
padded with zeroes.
"""
def __init__(self, digits):
self.digits = digits
def format(self, value):
return u'{0:0{1}d}'.format(value or 0, self.digits)
class ScaledInt(Integer):
"""An integer whose formatting operation scales the number by a
constant and adds a suffix. Good for units with large magnitudes.
"""
def __init__(self, unit, suffix=u''):
self.unit = unit
self.suffix = suffix
def format(self, value):
return u'{0}{1}'.format((value or 0) // self.unit, self.suffix)
class Id(Integer):
"""An integer used as the row id or a foreign key in a SQLite table.
This type is nullable: None values are not translated to zero.
"""
null = None
def __init__(self, primary=True):
if primary:
self.sql = u'INTEGER PRIMARY KEY'
class Float(Type):
"""A basic floating-point type.
"""
sql = u'REAL'
query = query.NumericQuery
model_type = float
def format(self, value):
return u'{0:.1f}'.format(value or 0.0)
class NullFloat(Float):
"""Same as `Float`, but does not normalize `None` to `0.0`.
"""
null = None
class String(Type):
"""A Unicode string type.
"""
sql = u'TEXT'
query = query.SubstringQuery
class Boolean(Type):
"""A boolean type.
"""
sql = u'INTEGER'
query = query.BooleanQuery
model_type = bool
def format(self, value):
return six.text_type(bool(value))
def parse(self, string):
return str2bool(string)
# Shared instances of common types.
DEFAULT = Default()
INTEGER = Integer()
PRIMARY_ID = Id(True)
FOREIGN_ID = Id(False)
FLOAT = Float()
NULL_FLOAT = NullFloat()
STRING = String()
BOOLEAN = Boolean()