nzbToMedia/libs/common/beets/dbcore/query.py
2018-12-16 13:50:27 -05:00

944 lines
29 KiB
Python

# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""The Query type hierarchy for DBCore.
"""
from __future__ import division, absolute_import, print_function
import re
from operator import mul
from beets import util
from datetime import datetime, timedelta
import unicodedata
from functools import reduce
import six
if not six.PY2:
buffer = memoryview # sqlite won't accept memoryview in python 2
class ParsingError(ValueError):
"""Abstract class for any unparseable user-requested album/query
specification.
"""
class InvalidQueryError(ParsingError):
"""Represent any kind of invalid query.
The query should be a unicode string or a list, which will be space-joined.
"""
def __init__(self, query, explanation):
if isinstance(query, list):
query = " ".join(query)
message = u"'{0}': {1}".format(query, explanation)
super(InvalidQueryError, self).__init__(message)
class InvalidQueryArgumentValueError(ParsingError):
"""Represent a query argument that could not be converted as expected.
It exists to be caught in upper stack levels so a meaningful (i.e. with the
query) InvalidQueryError can be raised.
"""
def __init__(self, what, expected, detail=None):
message = u"'{0}' is not {1}".format(what, expected)
if detail:
message = u"{0}: {1}".format(message, detail)
super(InvalidQueryArgumentValueError, self).__init__(message)
class Query(object):
"""An abstract class representing a query into the item database.
"""
def clause(self):
"""Generate an SQLite expression implementing the query.
Return (clause, subvals) where clause is a valid sqlite
WHERE clause implementing the query and subvals is a list of
items to be substituted for ?s in the clause.
"""
return None, ()
def match(self, item):
"""Check whether this query matches a given Item. Can be used to
perform queries on arbitrary sets of Items.
"""
raise NotImplementedError
def __repr__(self):
return "{0.__class__.__name__}()".format(self)
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return 0
class FieldQuery(Query):
"""An abstract query that searches in a specific field for a
pattern. Subclasses must provide a `value_match` class method, which
determines whether a certain pattern string matches a certain value
string. Subclasses may also provide `col_clause` to implement the
same matching functionality in SQLite.
"""
def __init__(self, field, pattern, fast=True):
self.field = field
self.pattern = pattern
self.fast = fast
def col_clause(self):
return None, ()
def clause(self):
if self.fast:
return self.col_clause()
else:
# Matching a flexattr. This is a slow query.
return None, ()
@classmethod
def value_match(cls, pattern, value):
"""Determine whether the value matches the pattern. Both
arguments are strings.
"""
raise NotImplementedError()
def match(self, item):
return self.value_match(self.pattern, item.get(self.field))
def __repr__(self):
return ("{0.__class__.__name__}({0.field!r}, {0.pattern!r}, "
"{0.fast})".format(self))
def __eq__(self, other):
return super(FieldQuery, self).__eq__(other) and \
self.field == other.field and self.pattern == other.pattern
def __hash__(self):
return hash((self.field, hash(self.pattern)))
class MatchQuery(FieldQuery):
"""A query that looks for exact matches in an item field."""
def col_clause(self):
return self.field + " = ?", [self.pattern]
@classmethod
def value_match(cls, pattern, value):
return pattern == value
class NoneQuery(FieldQuery):
"""A query that checks whether a field is null."""
def __init__(self, field, fast=True):
super(NoneQuery, self).__init__(field, None, fast)
def col_clause(self):
return self.field + " IS NULL", ()
@classmethod
def match(cls, item):
try:
return item[cls.field] is None
except KeyError:
return True
def __repr__(self):
return "{0.__class__.__name__}({0.field!r}, {0.fast})".format(self)
class StringFieldQuery(FieldQuery):
"""A FieldQuery that converts values to strings before matching
them.
"""
@classmethod
def value_match(cls, pattern, value):
"""Determine whether the value matches the pattern. The value
may have any type.
"""
return cls.string_match(pattern, util.as_string(value))
@classmethod
def string_match(cls, pattern, value):
"""Determine whether the value matches the pattern. Both
arguments are strings. Subclasses implement this method.
"""
raise NotImplementedError()
class SubstringQuery(StringFieldQuery):
"""A query that matches a substring in a specific item field."""
def col_clause(self):
pattern = (self.pattern
.replace('\\', '\\\\')
.replace('%', '\\%')
.replace('_', '\\_'))
search = '%' + pattern + '%'
clause = self.field + " like ? escape '\\'"
subvals = [search]
return clause, subvals
@classmethod
def string_match(cls, pattern, value):
return pattern.lower() in value.lower()
class RegexpQuery(StringFieldQuery):
"""A query that matches a regular expression in a specific item
field.
Raises InvalidQueryError when the pattern is not a valid regular
expression.
"""
def __init__(self, field, pattern, fast=True):
super(RegexpQuery, self).__init__(field, pattern, fast)
pattern = self._normalize(pattern)
try:
self.pattern = re.compile(self.pattern)
except re.error as exc:
# Invalid regular expression.
raise InvalidQueryArgumentValueError(pattern,
u"a regular expression",
format(exc))
@staticmethod
def _normalize(s):
"""Normalize a Unicode string's representation (used on both
patterns and matched values).
"""
return unicodedata.normalize('NFC', s)
@classmethod
def string_match(cls, pattern, value):
return pattern.search(cls._normalize(value)) is not None
class BooleanQuery(MatchQuery):
"""Matches a boolean field. Pattern should either be a boolean or a
string reflecting a boolean.
"""
def __init__(self, field, pattern, fast=True):
super(BooleanQuery, self).__init__(field, pattern, fast)
if isinstance(pattern, six.string_types):
self.pattern = util.str2bool(pattern)
self.pattern = int(self.pattern)
class BytesQuery(MatchQuery):
"""Match a raw bytes field (i.e., a path). This is a necessary hack
to work around the `sqlite3` module's desire to treat `bytes` and
`unicode` equivalently in Python 2. Always use this query instead of
`MatchQuery` when matching on BLOB values.
"""
def __init__(self, field, pattern):
super(BytesQuery, self).__init__(field, pattern)
# Use a buffer/memoryview representation of the pattern for SQLite
# matching. This instructs SQLite to treat the blob as binary
# rather than encoded Unicode.
if isinstance(self.pattern, (six.text_type, bytes)):
if isinstance(self.pattern, six.text_type):
self.pattern = self.pattern.encode('utf-8')
self.buf_pattern = buffer(self.pattern)
elif isinstance(self.pattern, buffer):
self.buf_pattern = self.pattern
self.pattern = bytes(self.pattern)
def col_clause(self):
return self.field + " = ?", [self.buf_pattern]
class NumericQuery(FieldQuery):
"""Matches numeric fields. A syntax using Ruby-style range ellipses
(``..``) lets users specify one- or two-sided ranges. For example,
``year:2001..`` finds music released since the turn of the century.
Raises InvalidQueryError when the pattern does not represent an int or
a float.
"""
def _convert(self, s):
"""Convert a string to a numeric type (float or int).
Return None if `s` is empty.
Raise an InvalidQueryError if the string cannot be converted.
"""
# This is really just a bit of fun premature optimization.
if not s:
return None
try:
return int(s)
except ValueError:
try:
return float(s)
except ValueError:
raise InvalidQueryArgumentValueError(s, u"an int or a float")
def __init__(self, field, pattern, fast=True):
super(NumericQuery, self).__init__(field, pattern, fast)
parts = pattern.split('..', 1)
if len(parts) == 1:
# No range.
self.point = self._convert(parts[0])
self.rangemin = None
self.rangemax = None
else:
# One- or two-sided range.
self.point = None
self.rangemin = self._convert(parts[0])
self.rangemax = self._convert(parts[1])
def match(self, item):
if self.field not in item:
return False
value = item[self.field]
if isinstance(value, six.string_types):
value = self._convert(value)
if self.point is not None:
return value == self.point
else:
if self.rangemin is not None and value < self.rangemin:
return False
if self.rangemax is not None and value > self.rangemax:
return False
return True
def col_clause(self):
if self.point is not None:
return self.field + '=?', (self.point,)
else:
if self.rangemin is not None and self.rangemax is not None:
return (u'{0} >= ? AND {0} <= ?'.format(self.field),
(self.rangemin, self.rangemax))
elif self.rangemin is not None:
return u'{0} >= ?'.format(self.field), (self.rangemin,)
elif self.rangemax is not None:
return u'{0} <= ?'.format(self.field), (self.rangemax,)
else:
return u'1', ()
class CollectionQuery(Query):
"""An abstract query class that aggregates other queries. Can be
indexed like a list to access the sub-queries.
"""
def __init__(self, subqueries=()):
self.subqueries = subqueries
# Act like a sequence.
def __len__(self):
return len(self.subqueries)
def __getitem__(self, key):
return self.subqueries[key]
def __iter__(self):
return iter(self.subqueries)
def __contains__(self, item):
return item in self.subqueries
def clause_with_joiner(self, joiner):
"""Return a clause created by joining together the clauses of
all subqueries with the string joiner (padded by spaces).
"""
clause_parts = []
subvals = []
for subq in self.subqueries:
subq_clause, subq_subvals = subq.clause()
if not subq_clause:
# Fall back to slow query.
return None, ()
clause_parts.append('(' + subq_clause + ')')
subvals += subq_subvals
clause = (' ' + joiner + ' ').join(clause_parts)
return clause, subvals
def __repr__(self):
return "{0.__class__.__name__}({0.subqueries!r})".format(self)
def __eq__(self, other):
return super(CollectionQuery, self).__eq__(other) and \
self.subqueries == other.subqueries
def __hash__(self):
"""Since subqueries are mutable, this object should not be hashable.
However and for conveniences purposes, it can be hashed.
"""
return reduce(mul, map(hash, self.subqueries), 1)
class AnyFieldQuery(CollectionQuery):
"""A query that matches if a given FieldQuery subclass matches in
any field. The individual field query class is provided to the
constructor.
"""
def __init__(self, pattern, fields, cls):
self.pattern = pattern
self.fields = fields
self.query_class = cls
subqueries = []
for field in self.fields:
subqueries.append(cls(field, pattern, True))
super(AnyFieldQuery, self).__init__(subqueries)
def clause(self):
return self.clause_with_joiner('or')
def match(self, item):
for subq in self.subqueries:
if subq.match(item):
return True
return False
def __repr__(self):
return ("{0.__class__.__name__}({0.pattern!r}, {0.fields!r}, "
"{0.query_class.__name__})".format(self))
def __eq__(self, other):
return super(AnyFieldQuery, self).__eq__(other) and \
self.query_class == other.query_class
def __hash__(self):
return hash((self.pattern, tuple(self.fields), self.query_class))
class MutableCollectionQuery(CollectionQuery):
"""A collection query whose subqueries may be modified after the
query is initialized.
"""
def __setitem__(self, key, value):
self.subqueries[key] = value
def __delitem__(self, key):
del self.subqueries[key]
class AndQuery(MutableCollectionQuery):
"""A conjunction of a list of other queries."""
def clause(self):
return self.clause_with_joiner('and')
def match(self, item):
return all([q.match(item) for q in self.subqueries])
class OrQuery(MutableCollectionQuery):
"""A conjunction of a list of other queries."""
def clause(self):
return self.clause_with_joiner('or')
def match(self, item):
return any([q.match(item) for q in self.subqueries])
class NotQuery(Query):
"""A query that matches the negation of its `subquery`, as a shorcut for
performing `not(subquery)` without using regular expressions.
"""
def __init__(self, subquery):
self.subquery = subquery
def clause(self):
clause, subvals = self.subquery.clause()
if clause:
return 'not ({0})'.format(clause), subvals
else:
# If there is no clause, there is nothing to negate. All the logic
# is handled by match() for slow queries.
return clause, subvals
def match(self, item):
return not self.subquery.match(item)
def __repr__(self):
return "{0.__class__.__name__}({0.subquery!r})".format(self)
def __eq__(self, other):
return super(NotQuery, self).__eq__(other) and \
self.subquery == other.subquery
def __hash__(self):
return hash(('not', hash(self.subquery)))
class TrueQuery(Query):
"""A query that always matches."""
def clause(self):
return '1', ()
def match(self, item):
return True
class FalseQuery(Query):
"""A query that never matches."""
def clause(self):
return '0', ()
def match(self, item):
return False
# Time/date queries.
def _to_epoch_time(date):
"""Convert a `datetime` object to an integer number of seconds since
the (local) Unix epoch.
"""
if hasattr(date, 'timestamp'):
# The `timestamp` method exists on Python 3.3+.
return int(date.timestamp())
else:
epoch = datetime.fromtimestamp(0)
delta = date - epoch
return int(delta.total_seconds())
def _parse_periods(pattern):
"""Parse a string containing two dates separated by two dots (..).
Return a pair of `Period` objects.
"""
parts = pattern.split('..', 1)
if len(parts) == 1:
instant = Period.parse(parts[0])
return (instant, instant)
else:
start = Period.parse(parts[0])
end = Period.parse(parts[1])
return (start, end)
class Period(object):
"""A period of time given by a date, time and precision.
Example: 2014-01-01 10:50:30 with precision 'month' represents all
instants of time during January 2014.
"""
precisions = ('year', 'month', 'day', 'hour', 'minute', 'second')
date_formats = (
('%Y',), # year
('%Y-%m',), # month
('%Y-%m-%d',), # day
('%Y-%m-%dT%H', '%Y-%m-%d %H'), # hour
('%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M'), # minute
('%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S') # second
)
relative_units = {'y': 365, 'm': 30, 'w': 7, 'd': 1}
relative_re = '(?P<sign>[+|-]?)(?P<quantity>[0-9]+)' + \
'(?P<timespan>[y|m|w|d])'
def __init__(self, date, precision):
"""Create a period with the given date (a `datetime` object) and
precision (a string, one of "year", "month", "day", "hour", "minute",
or "second").
"""
if precision not in Period.precisions:
raise ValueError(u'Invalid precision {0}'.format(precision))
self.date = date
self.precision = precision
@classmethod
def parse(cls, string):
"""Parse a date and return a `Period` object or `None` if the
string is empty, or raise an InvalidQueryArgumentValueError if
the string cannot be parsed to a date.
The date may be absolute or relative. Absolute dates look like
`YYYY`, or `YYYY-MM-DD`, or `YYYY-MM-DD HH:MM:SS`, etc. Relative
dates have three parts:
- Optionally, a ``+`` or ``-`` sign indicating the future or the
past. The default is the future.
- A number: how much to add or subtract.
- A letter indicating the unit: days, weeks, months or years
(``d``, ``w``, ``m`` or ``y``). A "month" is exactly 30 days
and a "year" is exactly 365 days.
"""
def find_date_and_format(string):
for ord, format in enumerate(cls.date_formats):
for format_option in format:
try:
date = datetime.strptime(string, format_option)
return date, ord
except ValueError:
# Parsing failed.
pass
return (None, None)
if not string:
return None
# Check for a relative date.
match_dq = re.match(cls.relative_re, string)
if match_dq:
sign = match_dq.group('sign')
quantity = match_dq.group('quantity')
timespan = match_dq.group('timespan')
# Add or subtract the given amount of time from the current
# date.
multiplier = -1 if sign == '-' else 1
days = cls.relative_units[timespan]
date = datetime.now() + \
timedelta(days=int(quantity) * days) * multiplier
return cls(date, cls.precisions[5])
# Check for an absolute date.
date, ordinal = find_date_and_format(string)
if date is None:
raise InvalidQueryArgumentValueError(string,
'a valid date/time string')
precision = cls.precisions[ordinal]
return cls(date, precision)
def open_right_endpoint(self):
"""Based on the precision, convert the period to a precise
`datetime` for use as a right endpoint in a right-open interval.
"""
precision = self.precision
date = self.date
if 'year' == self.precision:
return date.replace(year=date.year + 1, month=1)
elif 'month' == precision:
if (date.month < 12):
return date.replace(month=date.month + 1)
else:
return date.replace(year=date.year + 1, month=1)
elif 'day' == precision:
return date + timedelta(days=1)
elif 'hour' == precision:
return date + timedelta(hours=1)
elif 'minute' == precision:
return date + timedelta(minutes=1)
elif 'second' == precision:
return date + timedelta(seconds=1)
else:
raise ValueError(u'unhandled precision {0}'.format(precision))
class DateInterval(object):
"""A closed-open interval of dates.
A left endpoint of None means since the beginning of time.
A right endpoint of None means towards infinity.
"""
def __init__(self, start, end):
if start is not None and end is not None and not start < end:
raise ValueError(u"start date {0} is not before end date {1}"
.format(start, end))
self.start = start
self.end = end
@classmethod
def from_periods(cls, start, end):
"""Create an interval with two Periods as the endpoints.
"""
end_date = end.open_right_endpoint() if end is not None else None
start_date = start.date if start is not None else None
return cls(start_date, end_date)
def contains(self, date):
if self.start is not None and date < self.start:
return False
if self.end is not None and date >= self.end:
return False
return True
def __str__(self):
return '[{0}, {1})'.format(self.start, self.end)
class DateQuery(FieldQuery):
"""Matches date fields stored as seconds since Unix epoch time.
Dates can be specified as ``year-month-day`` strings where only year
is mandatory.
The value of a date field can be matched against a date interval by
using an ellipsis interval syntax similar to that of NumericQuery.
"""
def __init__(self, field, pattern, fast=True):
super(DateQuery, self).__init__(field, pattern, fast)
start, end = _parse_periods(pattern)
self.interval = DateInterval.from_periods(start, end)
def match(self, item):
if self.field not in item:
return False
timestamp = float(item[self.field])
date = datetime.fromtimestamp(timestamp)
return self.interval.contains(date)
_clause_tmpl = "{0} {1} ?"
def col_clause(self):
clause_parts = []
subvals = []
if self.interval.start:
clause_parts.append(self._clause_tmpl.format(self.field, ">="))
subvals.append(_to_epoch_time(self.interval.start))
if self.interval.end:
clause_parts.append(self._clause_tmpl.format(self.field, "<"))
subvals.append(_to_epoch_time(self.interval.end))
if clause_parts:
# One- or two-sided interval.
clause = ' AND '.join(clause_parts)
else:
# Match any date.
clause = '1'
return clause, subvals
class DurationQuery(NumericQuery):
"""NumericQuery that allow human-friendly (M:SS) time interval formats.
Converts the range(s) to a float value, and delegates on NumericQuery.
Raises InvalidQueryError when the pattern does not represent an int, float
or M:SS time interval.
"""
def _convert(self, s):
"""Convert a M:SS or numeric string to a float.
Return None if `s` is empty.
Raise an InvalidQueryError if the string cannot be converted.
"""
if not s:
return None
try:
return util.raw_seconds_short(s)
except ValueError:
try:
return float(s)
except ValueError:
raise InvalidQueryArgumentValueError(
s,
u"a M:SS string or a float")
# Sorting.
class Sort(object):
"""An abstract class representing a sort operation for a query into
the item database.
"""
def order_clause(self):
"""Generates a SQL fragment to be used in a ORDER BY clause, or
None if no fragment is used (i.e., this is a slow sort).
"""
return None
def sort(self, items):
"""Sort the list of objects and return a list.
"""
return sorted(items)
def is_slow(self):
"""Indicate whether this query is *slow*, meaning that it cannot
be executed in SQL and must be executed in Python.
"""
return False
def __hash__(self):
return 0
def __eq__(self, other):
return type(self) == type(other)
class MultipleSort(Sort):
"""Sort that encapsulates multiple sub-sorts.
"""
def __init__(self, sorts=None):
self.sorts = sorts or []
def add_sort(self, sort):
self.sorts.append(sort)
def _sql_sorts(self):
"""Return the list of sub-sorts for which we can be (at least
partially) fast.
A contiguous suffix of fast (SQL-capable) sub-sorts are
executable in SQL. The remaining, even if they are fast
independently, must be executed slowly.
"""
sql_sorts = []
for sort in reversed(self.sorts):
if not sort.order_clause() is None:
sql_sorts.append(sort)
else:
break
sql_sorts.reverse()
return sql_sorts
def order_clause(self):
order_strings = []
for sort in self._sql_sorts():
order = sort.order_clause()
order_strings.append(order)
return ", ".join(order_strings)
def is_slow(self):
for sort in self.sorts:
if sort.is_slow():
return True
return False
def sort(self, items):
slow_sorts = []
switch_slow = False
for sort in reversed(self.sorts):
if switch_slow:
slow_sorts.append(sort)
elif sort.order_clause() is None:
switch_slow = True
slow_sorts.append(sort)
else:
pass
for sort in slow_sorts:
items = sort.sort(items)
return items
def __repr__(self):
return 'MultipleSort({!r})'.format(self.sorts)
def __hash__(self):
return hash(tuple(self.sorts))
def __eq__(self, other):
return super(MultipleSort, self).__eq__(other) and \
self.sorts == other.sorts
class FieldSort(Sort):
"""An abstract sort criterion that orders by a specific field (of
any kind).
"""
def __init__(self, field, ascending=True, case_insensitive=True):
self.field = field
self.ascending = ascending
self.case_insensitive = case_insensitive
def sort(self, objs):
# TODO: Conversion and null-detection here. In Python 3,
# comparisons with None fail. We should also support flexible
# attributes with different types without falling over.
def key(item):
field_val = item.get(self.field, '')
if self.case_insensitive and isinstance(field_val, six.text_type):
field_val = field_val.lower()
return field_val
return sorted(objs, key=key, reverse=not self.ascending)
def __repr__(self):
return '<{0}: {1}{2}>'.format(
type(self).__name__,
self.field,
'+' if self.ascending else '-',
)
def __hash__(self):
return hash((self.field, self.ascending))
def __eq__(self, other):
return super(FieldSort, self).__eq__(other) and \
self.field == other.field and \
self.ascending == other.ascending
class FixedFieldSort(FieldSort):
"""Sort object to sort on a fixed field.
"""
def order_clause(self):
order = "ASC" if self.ascending else "DESC"
if self.case_insensitive:
field = '(CASE ' \
'WHEN TYPEOF({0})="text" THEN LOWER({0}) ' \
'WHEN TYPEOF({0})="blob" THEN LOWER({0}) ' \
'ELSE {0} END)'.format(self.field)
else:
field = self.field
return "{0} {1}".format(field, order)
class SlowFieldSort(FieldSort):
"""A sort criterion by some model field other than a fixed field:
i.e., a computed or flexible field.
"""
def is_slow(self):
return True
class NullSort(Sort):
"""No sorting. Leave results unsorted."""
def sort(self, items):
return items
def __nonzero__(self):
return self.__bool__()
def __bool__(self):
return False
def __eq__(self, other):
return type(self) == type(other) or other is None
def __hash__(self):
return 0