Move common libs to libs/common

This commit is contained in:
Labrys of Knossos 2018-12-16 13:30:24 -05:00
commit 1f4bd41bcc
1612 changed files with 962 additions and 10 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,261 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Fabrice Laporte
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Abstraction layer to resize images using PIL, ImageMagick, or a
public resizing proxy if neither is available.
"""
from __future__ import division, absolute_import, print_function
import subprocess
import os
import re
from tempfile import NamedTemporaryFile
from six.moves.urllib.parse import urlencode
from beets import logging
from beets import util
import six
# Resizing methods
PIL = 1
IMAGEMAGICK = 2
WEBPROXY = 3
if util.SNI_SUPPORTED:
PROXY_URL = 'https://images.weserv.nl/'
else:
PROXY_URL = 'http://images.weserv.nl/'
log = logging.getLogger('beets')
def resize_url(url, maxwidth):
"""Return a proxied image URL that resizes the original image to
maxwidth (preserving aspect ratio).
"""
return '{0}?{1}'.format(PROXY_URL, urlencode({
'url': url.replace('http://', ''),
'w': maxwidth,
}))
def temp_file_for(path):
"""Return an unused filename with the same extension as the
specified path.
"""
ext = os.path.splitext(path)[1]
with NamedTemporaryFile(suffix=util.py3_path(ext), delete=False) as f:
return util.bytestring_path(f.name)
def pil_resize(maxwidth, path_in, path_out=None):
"""Resize using Python Imaging Library (PIL). Return the output path
of resized image.
"""
path_out = path_out or temp_file_for(path_in)
from PIL import Image
log.debug(u'artresizer: PIL resizing {0} to {1}',
util.displayable_path(path_in), util.displayable_path(path_out))
try:
im = Image.open(util.syspath(path_in))
size = maxwidth, maxwidth
im.thumbnail(size, Image.ANTIALIAS)
im.save(path_out)
return path_out
except IOError:
log.error(u"PIL cannot create thumbnail for '{0}'",
util.displayable_path(path_in))
return path_in
def im_resize(maxwidth, path_in, path_out=None):
"""Resize using ImageMagick's ``convert`` tool.
Return the output path of resized image.
"""
path_out = path_out or temp_file_for(path_in)
log.debug(u'artresizer: ImageMagick resizing {0} to {1}',
util.displayable_path(path_in), util.displayable_path(path_out))
# "-resize WIDTHx>" shrinks images with the width larger
# than the given width while maintaining the aspect ratio
# with regards to the height.
try:
util.command_output([
'convert', util.syspath(path_in, prefix=False),
'-resize', '{0}x>'.format(maxwidth),
util.syspath(path_out, prefix=False),
])
except subprocess.CalledProcessError:
log.warning(u'artresizer: IM convert failed for {0}',
util.displayable_path(path_in))
return path_in
return path_out
BACKEND_FUNCS = {
PIL: pil_resize,
IMAGEMAGICK: im_resize,
}
def pil_getsize(path_in):
from PIL import Image
try:
im = Image.open(util.syspath(path_in))
return im.size
except IOError as exc:
log.error(u"PIL could not read file {}: {}",
util.displayable_path(path_in), exc)
def im_getsize(path_in):
cmd = ['identify', '-format', '%w %h',
util.syspath(path_in, prefix=False)]
try:
out = util.command_output(cmd)
except subprocess.CalledProcessError as exc:
log.warning(u'ImageMagick size query failed')
log.debug(
u'`convert` exited with (status {}) when '
u'getting size with command {}:\n{}',
exc.returncode, cmd, exc.output.strip()
)
return
try:
return tuple(map(int, out.split(b' ')))
except IndexError:
log.warning(u'Could not understand IM output: {0!r}', out)
BACKEND_GET_SIZE = {
PIL: pil_getsize,
IMAGEMAGICK: im_getsize,
}
class Shareable(type):
"""A pseudo-singleton metaclass that allows both shared and
non-shared instances. The ``MyClass.shared`` property holds a
lazily-created shared instance of ``MyClass`` while calling
``MyClass()`` to construct a new object works as usual.
"""
def __init__(cls, name, bases, dict):
super(Shareable, cls).__init__(name, bases, dict)
cls._instance = None
@property
def shared(cls):
if cls._instance is None:
cls._instance = cls()
return cls._instance
class ArtResizer(six.with_metaclass(Shareable, object)):
"""A singleton class that performs image resizes.
"""
def __init__(self):
"""Create a resizer object with an inferred method.
"""
self.method = self._check_method()
log.debug(u"artresizer: method is {0}", self.method)
self.can_compare = self._can_compare()
def resize(self, maxwidth, path_in, path_out=None):
"""Manipulate an image file according to the method, returning a
new path. For PIL or IMAGEMAGIC methods, resizes the image to a
temporary file. For WEBPROXY, returns `path_in` unmodified.
"""
if self.local:
func = BACKEND_FUNCS[self.method[0]]
return func(maxwidth, path_in, path_out)
else:
return path_in
def proxy_url(self, maxwidth, url):
"""Modifies an image URL according the method, returning a new
URL. For WEBPROXY, a URL on the proxy server is returned.
Otherwise, the URL is returned unmodified.
"""
if self.local:
return url
else:
return resize_url(url, maxwidth)
@property
def local(self):
"""A boolean indicating whether the resizing method is performed
locally (i.e., PIL or ImageMagick).
"""
return self.method[0] in BACKEND_FUNCS
def get_size(self, path_in):
"""Return the size of an image file as an int couple (width, height)
in pixels.
Only available locally
"""
if self.local:
func = BACKEND_GET_SIZE[self.method[0]]
return func(path_in)
def _can_compare(self):
"""A boolean indicating whether image comparison is available"""
return self.method[0] == IMAGEMAGICK and self.method[1] > (6, 8, 7)
@staticmethod
def _check_method():
"""Return a tuple indicating an available method and its version."""
version = get_im_version()
if version:
return IMAGEMAGICK, version
version = get_pil_version()
if version:
return PIL, version
return WEBPROXY, (0)
def get_im_version():
"""Return Image Magick version or None if it is unavailable
Try invoking ImageMagick's "convert".
"""
try:
out = util.command_output(['convert', '--version'])
if b'imagemagick' in out.lower():
pattern = br".+ (\d+)\.(\d+)\.(\d+).*"
match = re.search(pattern, out)
if match:
return (int(match.group(1)),
int(match.group(2)),
int(match.group(3)))
return (0,)
except (subprocess.CalledProcessError, OSError) as exc:
log.debug(u'ImageMagick check `convert --version` failed: {}', exc)
return None
def get_pil_version():
"""Return Image Magick version or None if it is unavailable
Try importing PIL."""
try:
__import__('PIL', fromlist=[str('Image')])
return (0,)
except ImportError:
return None

View file

@ -0,0 +1,638 @@
# -*- coding: utf-8 -*-
"""Extremely simple pure-Python implementation of coroutine-style
asynchronous socket I/O. Inspired by, but inferior to, Eventlet.
Bluelet can also be thought of as a less-terrible replacement for
asyncore.
Bluelet: easy concurrency without all the messy parallelism.
"""
from __future__ import division, absolute_import, print_function
import six
import socket
import select
import sys
import types
import errno
import traceback
import time
import collections
# Basic events used for thread scheduling.
class Event(object):
"""Just a base class identifying Bluelet events. An event is an
object yielded from a Bluelet thread coroutine to suspend operation
and communicate with the scheduler.
"""
pass
class WaitableEvent(Event):
"""A waitable event is one encapsulating an action that can be
waited for using a select() call. That is, it's an event with an
associated file descriptor.
"""
def waitables(self):
"""Return "waitable" objects to pass to select(). Should return
three iterables for input readiness, output readiness, and
exceptional conditions (i.e., the three lists passed to
select()).
"""
return (), (), ()
def fire(self):
"""Called when an associated file descriptor becomes ready
(i.e., is returned from a select() call).
"""
pass
class ValueEvent(Event):
"""An event that does nothing but return a fixed value."""
def __init__(self, value):
self.value = value
class ExceptionEvent(Event):
"""Raise an exception at the yield point. Used internally."""
def __init__(self, exc_info):
self.exc_info = exc_info
class SpawnEvent(Event):
"""Add a new coroutine thread to the scheduler."""
def __init__(self, coro):
self.spawned = coro
class JoinEvent(Event):
"""Suspend the thread until the specified child thread has
completed.
"""
def __init__(self, child):
self.child = child
class KillEvent(Event):
"""Unschedule a child thread."""
def __init__(self, child):
self.child = child
class DelegationEvent(Event):
"""Suspend execution of the current thread, start a new thread and,
once the child thread finished, return control to the parent
thread.
"""
def __init__(self, coro):
self.spawned = coro
class ReturnEvent(Event):
"""Return a value the current thread's delegator at the point of
delegation. Ends the current (delegate) thread.
"""
def __init__(self, value):
self.value = value
class SleepEvent(WaitableEvent):
"""Suspend the thread for a given duration.
"""
def __init__(self, duration):
self.wakeup_time = time.time() + duration
def time_left(self):
return max(self.wakeup_time - time.time(), 0.0)
class ReadEvent(WaitableEvent):
"""Reads from a file-like object."""
def __init__(self, fd, bufsize):
self.fd = fd
self.bufsize = bufsize
def waitables(self):
return (self.fd,), (), ()
def fire(self):
return self.fd.read(self.bufsize)
class WriteEvent(WaitableEvent):
"""Writes to a file-like object."""
def __init__(self, fd, data):
self.fd = fd
self.data = data
def waitable(self):
return (), (self.fd,), ()
def fire(self):
self.fd.write(self.data)
# Core logic for executing and scheduling threads.
def _event_select(events):
"""Perform a select() over all the Events provided, returning the
ones ready to be fired. Only WaitableEvents (including SleepEvents)
matter here; all other events are ignored (and thus postponed).
"""
# Gather waitables and wakeup times.
waitable_to_event = {}
rlist, wlist, xlist = [], [], []
earliest_wakeup = None
for event in events:
if isinstance(event, SleepEvent):
if not earliest_wakeup:
earliest_wakeup = event.wakeup_time
else:
earliest_wakeup = min(earliest_wakeup, event.wakeup_time)
elif isinstance(event, WaitableEvent):
r, w, x = event.waitables()
rlist += r
wlist += w
xlist += x
for waitable in r:
waitable_to_event[('r', waitable)] = event
for waitable in w:
waitable_to_event[('w', waitable)] = event
for waitable in x:
waitable_to_event[('x', waitable)] = event
# If we have a any sleeping threads, determine how long to sleep.
if earliest_wakeup:
timeout = max(earliest_wakeup - time.time(), 0.0)
else:
timeout = None
# Perform select() if we have any waitables.
if rlist or wlist or xlist:
rready, wready, xready = select.select(rlist, wlist, xlist, timeout)
else:
rready, wready, xready = (), (), ()
if timeout:
time.sleep(timeout)
# Gather ready events corresponding to the ready waitables.
ready_events = set()
for ready in rready:
ready_events.add(waitable_to_event[('r', ready)])
for ready in wready:
ready_events.add(waitable_to_event[('w', ready)])
for ready in xready:
ready_events.add(waitable_to_event[('x', ready)])
# Gather any finished sleeps.
for event in events:
if isinstance(event, SleepEvent) and event.time_left() == 0.0:
ready_events.add(event)
return ready_events
class ThreadException(Exception):
def __init__(self, coro, exc_info):
self.coro = coro
self.exc_info = exc_info
def reraise(self):
six.reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2])
SUSPENDED = Event() # Special sentinel placeholder for suspended threads.
class Delegated(Event):
"""Placeholder indicating that a thread has delegated execution to a
different thread.
"""
def __init__(self, child):
self.child = child
def run(root_coro):
"""Schedules a coroutine, running it to completion. This
encapsulates the Bluelet scheduler, which the root coroutine can
add to by spawning new coroutines.
"""
# The "threads" dictionary keeps track of all the currently-
# executing and suspended coroutines. It maps coroutines to their
# currently "blocking" event. The event value may be SUSPENDED if
# the coroutine is waiting on some other condition: namely, a
# delegated coroutine or a joined coroutine. In this case, the
# coroutine should *also* appear as a value in one of the below
# dictionaries `delegators` or `joiners`.
threads = {root_coro: ValueEvent(None)}
# Maps child coroutines to delegating parents.
delegators = {}
# Maps child coroutines to joining (exit-waiting) parents.
joiners = collections.defaultdict(list)
def complete_thread(coro, return_value):
"""Remove a coroutine from the scheduling pool, awaking
delegators and joiners as necessary and returning the specified
value to any delegating parent.
"""
del threads[coro]
# Resume delegator.
if coro in delegators:
threads[delegators[coro]] = ValueEvent(return_value)
del delegators[coro]
# Resume joiners.
if coro in joiners:
for parent in joiners[coro]:
threads[parent] = ValueEvent(None)
del joiners[coro]
def advance_thread(coro, value, is_exc=False):
"""After an event is fired, run a given coroutine associated with
it in the threads dict until it yields again. If the coroutine
exits, then the thread is removed from the pool. If the coroutine
raises an exception, it is reraised in a ThreadException. If
is_exc is True, then the value must be an exc_info tuple and the
exception is thrown into the coroutine.
"""
try:
if is_exc:
next_event = coro.throw(*value)
else:
next_event = coro.send(value)
except StopIteration:
# Thread is done.
complete_thread(coro, None)
except BaseException:
# Thread raised some other exception.
del threads[coro]
raise ThreadException(coro, sys.exc_info())
else:
if isinstance(next_event, types.GeneratorType):
# Automatically invoke sub-coroutines. (Shorthand for
# explicit bluelet.call().)
next_event = DelegationEvent(next_event)
threads[coro] = next_event
def kill_thread(coro):
"""Unschedule this thread and its (recursive) delegates.
"""
# Collect all coroutines in the delegation stack.
coros = [coro]
while isinstance(threads[coro], Delegated):
coro = threads[coro].child
coros.append(coro)
# Complete each coroutine from the top to the bottom of the
# stack.
for coro in reversed(coros):
complete_thread(coro, None)
# Continue advancing threads until root thread exits.
exit_te = None
while threads:
try:
# Look for events that can be run immediately. Continue
# running immediate events until nothing is ready.
while True:
have_ready = False
for coro, event in list(threads.items()):
if isinstance(event, SpawnEvent):
threads[event.spawned] = ValueEvent(None) # Spawn.
advance_thread(coro, None)
have_ready = True
elif isinstance(event, ValueEvent):
advance_thread(coro, event.value)
have_ready = True
elif isinstance(event, ExceptionEvent):
advance_thread(coro, event.exc_info, True)
have_ready = True
elif isinstance(event, DelegationEvent):
threads[coro] = Delegated(event.spawned) # Suspend.
threads[event.spawned] = ValueEvent(None) # Spawn.
delegators[event.spawned] = coro
have_ready = True
elif isinstance(event, ReturnEvent):
# Thread is done.
complete_thread(coro, event.value)
have_ready = True
elif isinstance(event, JoinEvent):
threads[coro] = SUSPENDED # Suspend.
joiners[event.child].append(coro)
have_ready = True
elif isinstance(event, KillEvent):
threads[coro] = ValueEvent(None)
kill_thread(event.child)
have_ready = True
# Only start the select when nothing else is ready.
if not have_ready:
break
# Wait and fire.
event2coro = dict((v, k) for k, v in threads.items())
for event in _event_select(threads.values()):
# Run the IO operation, but catch socket errors.
try:
value = event.fire()
except socket.error as exc:
if isinstance(exc.args, tuple) and \
exc.args[0] == errno.EPIPE:
# Broken pipe. Remote host disconnected.
pass
else:
traceback.print_exc()
# Abort the coroutine.
threads[event2coro[event]] = ReturnEvent(None)
else:
advance_thread(event2coro[event], value)
except ThreadException as te:
# Exception raised from inside a thread.
event = ExceptionEvent(te.exc_info)
if te.coro in delegators:
# The thread is a delegate. Raise exception in its
# delegator.
threads[delegators[te.coro]] = event
del delegators[te.coro]
else:
# The thread is root-level. Raise in client code.
exit_te = te
break
except BaseException:
# For instance, KeyboardInterrupt during select(). Raise
# into root thread and terminate others.
threads = {root_coro: ExceptionEvent(sys.exc_info())}
# If any threads still remain, kill them.
for coro in threads:
coro.close()
# If we're exiting with an exception, raise it in the client.
if exit_te:
exit_te.reraise()
# Sockets and their associated events.
class SocketClosedError(Exception):
pass
class Listener(object):
"""A socket wrapper object for listening sockets.
"""
def __init__(self, host, port):
"""Create a listening socket on the given hostname and port.
"""
self._closed = False
self.host = host
self.port = port
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.sock.bind((host, port))
self.sock.listen(5)
def accept(self):
"""An event that waits for a connection on the listening socket.
When a connection is made, the event returns a Connection
object.
"""
if self._closed:
raise SocketClosedError()
return AcceptEvent(self)
def close(self):
"""Immediately close the listening socket. (Not an event.)
"""
self._closed = True
self.sock.close()
class Connection(object):
"""A socket wrapper object for connected sockets.
"""
def __init__(self, sock, addr):
self.sock = sock
self.addr = addr
self._buf = b''
self._closed = False
def close(self):
"""Close the connection."""
self._closed = True
self.sock.close()
def recv(self, size):
"""Read at most size bytes of data from the socket."""
if self._closed:
raise SocketClosedError()
if self._buf:
# We already have data read previously.
out = self._buf[:size]
self._buf = self._buf[size:]
return ValueEvent(out)
else:
return ReceiveEvent(self, size)
def send(self, data):
"""Sends data on the socket, returning the number of bytes
successfully sent.
"""
if self._closed:
raise SocketClosedError()
return SendEvent(self, data)
def sendall(self, data):
"""Send all of data on the socket."""
if self._closed:
raise SocketClosedError()
return SendEvent(self, data, True)
def readline(self, terminator=b"\n", bufsize=1024):
"""Reads a line (delimited by terminator) from the socket."""
if self._closed:
raise SocketClosedError()
while True:
if terminator in self._buf:
line, self._buf = self._buf.split(terminator, 1)
line += terminator
yield ReturnEvent(line)
break
data = yield ReceiveEvent(self, bufsize)
if data:
self._buf += data
else:
line = self._buf
self._buf = b''
yield ReturnEvent(line)
break
class AcceptEvent(WaitableEvent):
"""An event for Listener objects (listening sockets) that suspends
execution until the socket gets a connection.
"""
def __init__(self, listener):
self.listener = listener
def waitables(self):
return (self.listener.sock,), (), ()
def fire(self):
sock, addr = self.listener.sock.accept()
return Connection(sock, addr)
class ReceiveEvent(WaitableEvent):
"""An event for Connection objects (connected sockets) for
asynchronously reading data.
"""
def __init__(self, conn, bufsize):
self.conn = conn
self.bufsize = bufsize
def waitables(self):
return (self.conn.sock,), (), ()
def fire(self):
return self.conn.sock.recv(self.bufsize)
class SendEvent(WaitableEvent):
"""An event for Connection objects (connected sockets) for
asynchronously writing data.
"""
def __init__(self, conn, data, sendall=False):
self.conn = conn
self.data = data
self.sendall = sendall
def waitables(self):
return (), (self.conn.sock,), ()
def fire(self):
if self.sendall:
return self.conn.sock.sendall(self.data)
else:
return self.conn.sock.send(self.data)
# Public interface for threads; each returns an event object that
# can immediately be "yield"ed.
def null():
"""Event: yield to the scheduler without doing anything special.
"""
return ValueEvent(None)
def spawn(coro):
"""Event: add another coroutine to the scheduler. Both the parent
and child coroutines run concurrently.
"""
if not isinstance(coro, types.GeneratorType):
raise ValueError(u'%s is not a coroutine' % coro)
return SpawnEvent(coro)
def call(coro):
"""Event: delegate to another coroutine. The current coroutine
is resumed once the sub-coroutine finishes. If the sub-coroutine
returns a value using end(), then this event returns that value.
"""
if not isinstance(coro, types.GeneratorType):
raise ValueError(u'%s is not a coroutine' % coro)
return DelegationEvent(coro)
def end(value=None):
"""Event: ends the coroutine and returns a value to its
delegator.
"""
return ReturnEvent(value)
def read(fd, bufsize=None):
"""Event: read from a file descriptor asynchronously."""
if bufsize is None:
# Read all.
def reader():
buf = []
while True:
data = yield read(fd, 1024)
if not data:
break
buf.append(data)
yield ReturnEvent(''.join(buf))
return DelegationEvent(reader())
else:
return ReadEvent(fd, bufsize)
def write(fd, data):
"""Event: write to a file descriptor asynchronously."""
return WriteEvent(fd, data)
def connect(host, port):
"""Event: connect to a network address and return a Connection
object for communicating on the socket.
"""
addr = (host, port)
sock = socket.create_connection(addr)
return ValueEvent(Connection(sock, addr))
def sleep(duration):
"""Event: suspend the thread for ``duration`` seconds.
"""
return SleepEvent(duration)
def join(coro):
"""Suspend the thread until another, previously `spawn`ed thread
completes.
"""
return JoinEvent(coro)
def kill(coro):
"""Halt the execution of a different `spawn`ed thread.
"""
return KillEvent(coro)
# Convenience function for running socket servers.
def server(host, port, func):
"""A coroutine that runs a network server. Host and port specify the
listening address. func should be a coroutine that takes a single
parameter, a Connection object. The coroutine is invoked for every
incoming connection on the listening socket.
"""
def handler(conn):
try:
yield func(conn)
finally:
conn.close()
listener = Listener(host, port)
try:
while True:
conn = yield listener.accept()
yield spawn(handler(conn))
except KeyboardInterrupt:
pass
finally:
listener.close()

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
from __future__ import division, absolute_import, print_function
from enum import Enum
class OrderedEnum(Enum):
"""
An Enum subclass that allows comparison of members.
"""
def __ge__(self, other):
if self.__class__ is other.__class__:
return self.value >= other.value
return NotImplemented
def __gt__(self, other):
if self.__class__ is other.__class__:
return self.value > other.value
return NotImplemented
def __le__(self, other):
if self.__class__ is other.__class__:
return self.value <= other.value
return NotImplemented
def __lt__(self, other):
if self.__class__ is other.__class__:
return self.value < other.value
return NotImplemented

View file

@ -0,0 +1,623 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""This module implements a string formatter based on the standard PEP
292 string.Template class extended with function calls. Variables, as
with string.Template, are indicated with $ and functions are delimited
with %.
This module assumes that everything is Unicode: the template and the
substitution values. Bytestrings are not supported. Also, the templates
always behave like the ``safe_substitute`` method in the standard
library: unknown symbols are left intact.
This is sort of like a tiny, horrible degeneration of a real templating
engine like Jinja2 or Mustache.
"""
from __future__ import division, absolute_import, print_function
import re
import ast
import dis
import types
import sys
import six
SYMBOL_DELIM = u'$'
FUNC_DELIM = u'%'
GROUP_OPEN = u'{'
GROUP_CLOSE = u'}'
ARG_SEP = u','
ESCAPE_CHAR = u'$'
VARIABLE_PREFIX = '__var_'
FUNCTION_PREFIX = '__func_'
class Environment(object):
"""Contains the values and functions to be substituted into a
template.
"""
def __init__(self, values, functions):
self.values = values
self.functions = functions
# Code generation helpers.
def ex_lvalue(name):
"""A variable load expression."""
return ast.Name(name, ast.Store())
def ex_rvalue(name):
"""A variable store expression."""
return ast.Name(name, ast.Load())
def ex_literal(val):
"""An int, float, long, bool, string, or None literal with the given
value.
"""
if val is None:
return ast.Name('None', ast.Load())
elif isinstance(val, six.integer_types):
return ast.Num(val)
elif isinstance(val, bool):
return ast.Name(bytes(val), ast.Load())
elif isinstance(val, six.string_types):
return ast.Str(val)
raise TypeError(u'no literal for {0}'.format(type(val)))
def ex_varassign(name, expr):
"""Assign an expression into a single variable. The expression may
either be an `ast.expr` object or a value to be used as a literal.
"""
if not isinstance(expr, ast.expr):
expr = ex_literal(expr)
return ast.Assign([ex_lvalue(name)], expr)
def ex_call(func, args):
"""A function-call expression with only positional parameters. The
function may be an expression or the name of a function. Each
argument may be an expression or a value to be used as a literal.
"""
if isinstance(func, six.string_types):
func = ex_rvalue(func)
args = list(args)
for i in range(len(args)):
if not isinstance(args[i], ast.expr):
args[i] = ex_literal(args[i])
if sys.version_info[:2] < (3, 5):
return ast.Call(func, args, [], None, None)
else:
return ast.Call(func, args, [])
def compile_func(arg_names, statements, name='_the_func', debug=False):
"""Compile a list of statements as the body of a function and return
the resulting Python function. If `debug`, then print out the
bytecode of the compiled function.
"""
if six.PY2:
func_def = ast.FunctionDef(
name=name.encode('utf-8'),
args=ast.arguments(
args=[ast.Name(n, ast.Param()) for n in arg_names],
vararg=None,
kwarg=None,
defaults=[ex_literal(None) for _ in arg_names],
),
body=statements,
decorator_list=[],
)
else:
func_def = ast.FunctionDef(
name=name,
args=ast.arguments(
args=[ast.arg(arg=n, annotation=None) for n in arg_names],
kwonlyargs=[],
kw_defaults=[],
defaults=[ex_literal(None) for _ in arg_names],
),
body=statements,
decorator_list=[],
)
mod = ast.Module([func_def])
ast.fix_missing_locations(mod)
prog = compile(mod, '<generated>', 'exec')
# Debug: show bytecode.
if debug:
dis.dis(prog)
for const in prog.co_consts:
if isinstance(const, types.CodeType):
dis.dis(const)
the_locals = {}
exec(prog, {}, the_locals)
return the_locals[name]
# AST nodes for the template language.
class Symbol(object):
"""A variable-substitution symbol in a template."""
def __init__(self, ident, original):
self.ident = ident
self.original = original
def __repr__(self):
return u'Symbol(%s)' % repr(self.ident)
def evaluate(self, env):
"""Evaluate the symbol in the environment, returning a Unicode
string.
"""
if self.ident in env.values:
# Substitute for a value.
return env.values[self.ident]
else:
# Keep original text.
return self.original
def translate(self):
"""Compile the variable lookup."""
if six.PY2:
ident = self.ident.encode('utf-8')
else:
ident = self.ident
expr = ex_rvalue(VARIABLE_PREFIX + ident)
return [expr], set([ident]), set()
class Call(object):
"""A function call in a template."""
def __init__(self, ident, args, original):
self.ident = ident
self.args = args
self.original = original
def __repr__(self):
return u'Call(%s, %s, %s)' % (repr(self.ident), repr(self.args),
repr(self.original))
def evaluate(self, env):
"""Evaluate the function call in the environment, returning a
Unicode string.
"""
if self.ident in env.functions:
arg_vals = [expr.evaluate(env) for expr in self.args]
try:
out = env.functions[self.ident](*arg_vals)
except Exception as exc:
# Function raised exception! Maybe inlining the name of
# the exception will help debug.
return u'<%s>' % six.text_type(exc)
return six.text_type(out)
else:
return self.original
def translate(self):
"""Compile the function call."""
varnames = set()
if six.PY2:
ident = self.ident.encode('utf-8')
else:
ident = self.ident
funcnames = set([ident])
arg_exprs = []
for arg in self.args:
subexprs, subvars, subfuncs = arg.translate()
varnames.update(subvars)
funcnames.update(subfuncs)
# Create a subexpression that joins the result components of
# the arguments.
arg_exprs.append(ex_call(
ast.Attribute(ex_literal(u''), 'join', ast.Load()),
[ex_call(
'map',
[
ex_rvalue(six.text_type.__name__),
ast.List(subexprs, ast.Load()),
]
)],
))
subexpr_call = ex_call(
FUNCTION_PREFIX + ident,
arg_exprs
)
return [subexpr_call], varnames, funcnames
class Expression(object):
"""Top-level template construct: contains a list of text blobs,
Symbols, and Calls.
"""
def __init__(self, parts):
self.parts = parts
def __repr__(self):
return u'Expression(%s)' % (repr(self.parts))
def evaluate(self, env):
"""Evaluate the entire expression in the environment, returning
a Unicode string.
"""
out = []
for part in self.parts:
if isinstance(part, six.string_types):
out.append(part)
else:
out.append(part.evaluate(env))
return u''.join(map(six.text_type, out))
def translate(self):
"""Compile the expression to a list of Python AST expressions, a
set of variable names used, and a set of function names.
"""
expressions = []
varnames = set()
funcnames = set()
for part in self.parts:
if isinstance(part, six.string_types):
expressions.append(ex_literal(part))
else:
e, v, f = part.translate()
expressions.extend(e)
varnames.update(v)
funcnames.update(f)
return expressions, varnames, funcnames
# Parser.
class ParseError(Exception):
pass
class Parser(object):
"""Parses a template expression string. Instantiate the class with
the template source and call ``parse_expression``. The ``pos`` field
will indicate the character after the expression finished and
``parts`` will contain a list of Unicode strings, Symbols, and Calls
reflecting the concatenated portions of the expression.
This is a terrible, ad-hoc parser implementation based on a
left-to-right scan with no lexing step to speak of; it's probably
both inefficient and incorrect. Maybe this should eventually be
replaced with a real, accepted parsing technique (PEG, parser
generator, etc.).
"""
def __init__(self, string, in_argument=False):
""" Create a new parser.
:param in_arguments: boolean that indicates the parser is to be
used for parsing function arguments, ie. considering commas
(`ARG_SEP`) a special character
"""
self.string = string
self.in_argument = in_argument
self.pos = 0
self.parts = []
# Common parsing resources.
special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE,
ESCAPE_CHAR)
special_char_re = re.compile(r'[%s]|\Z' %
u''.join(re.escape(c) for c in special_chars))
escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP)
terminator_chars = (GROUP_CLOSE,)
def parse_expression(self):
"""Parse a template expression starting at ``pos``. Resulting
components (Unicode strings, Symbols, and Calls) are added to
the ``parts`` field, a list. The ``pos`` field is updated to be
the next character after the expression.
"""
# Append comma (ARG_SEP) to the list of special characters only when
# parsing function arguments.
extra_special_chars = ()
special_char_re = self.special_char_re
if self.in_argument:
extra_special_chars = (ARG_SEP,)
special_char_re = re.compile(
r'[%s]|\Z' % u''.join(
re.escape(c) for c in
self.special_chars + extra_special_chars
)
)
text_parts = []
while self.pos < len(self.string):
char = self.string[self.pos]
if char not in self.special_chars + extra_special_chars:
# A non-special character. Skip to the next special
# character, treating the interstice as literal text.
next_pos = (
special_char_re.search(
self.string[self.pos:]).start() + self.pos
)
text_parts.append(self.string[self.pos:next_pos])
self.pos = next_pos
continue
if self.pos == len(self.string) - 1:
# The last character can never begin a structure, so we
# just interpret it as a literal character (unless it
# terminates the expression, as with , and }).
if char not in self.terminator_chars + extra_special_chars:
text_parts.append(char)
self.pos += 1
break
next_char = self.string[self.pos + 1]
if char == ESCAPE_CHAR and next_char in (self.escapable_chars +
extra_special_chars):
# An escaped special character ($$, $}, etc.). Note that
# ${ is not an escape sequence: this is ambiguous with
# the start of a symbol and it's not necessary (just
# using { suffices in all cases).
text_parts.append(next_char)
self.pos += 2 # Skip the next character.
continue
# Shift all characters collected so far into a single string.
if text_parts:
self.parts.append(u''.join(text_parts))
text_parts = []
if char == SYMBOL_DELIM:
# Parse a symbol.
self.parse_symbol()
elif char == FUNC_DELIM:
# Parse a function call.
self.parse_call()
elif char in self.terminator_chars + extra_special_chars:
# Template terminated.
break
elif char == GROUP_OPEN:
# Start of a group has no meaning hear; just pass
# through the character.
text_parts.append(char)
self.pos += 1
else:
assert False
# If any parsed characters remain, shift them into a string.
if text_parts:
self.parts.append(u''.join(text_parts))
def parse_symbol(self):
"""Parse a variable reference (like ``$foo`` or ``${foo}``)
starting at ``pos``. Possibly appends a Symbol object (or,
failing that, text) to the ``parts`` field and updates ``pos``.
The character at ``pos`` must, as a precondition, be ``$``.
"""
assert self.pos < len(self.string)
assert self.string[self.pos] == SYMBOL_DELIM
if self.pos == len(self.string) - 1:
# Last character.
self.parts.append(SYMBOL_DELIM)
self.pos += 1
return
next_char = self.string[self.pos + 1]
start_pos = self.pos
self.pos += 1
if next_char == GROUP_OPEN:
# A symbol like ${this}.
self.pos += 1 # Skip opening.
closer = self.string.find(GROUP_CLOSE, self.pos)
if closer == -1 or closer == self.pos:
# No closing brace found or identifier is empty.
self.parts.append(self.string[start_pos:self.pos])
else:
# Closer found.
ident = self.string[self.pos:closer]
self.pos = closer + 1
self.parts.append(Symbol(ident,
self.string[start_pos:self.pos]))
else:
# A bare-word symbol.
ident = self._parse_ident()
if ident:
# Found a real symbol.
self.parts.append(Symbol(ident,
self.string[start_pos:self.pos]))
else:
# A standalone $.
self.parts.append(SYMBOL_DELIM)
def parse_call(self):
"""Parse a function call (like ``%foo{bar,baz}``) starting at
``pos``. Possibly appends a Call object to ``parts`` and update
``pos``. The character at ``pos`` must be ``%``.
"""
assert self.pos < len(self.string)
assert self.string[self.pos] == FUNC_DELIM
start_pos = self.pos
self.pos += 1
ident = self._parse_ident()
if not ident:
# No function name.
self.parts.append(FUNC_DELIM)
return
if self.pos >= len(self.string):
# Identifier terminates string.
self.parts.append(self.string[start_pos:self.pos])
return
if self.string[self.pos] != GROUP_OPEN:
# Argument list not opened.
self.parts.append(self.string[start_pos:self.pos])
return
# Skip past opening brace and try to parse an argument list.
self.pos += 1
args = self.parse_argument_list()
if self.pos >= len(self.string) or \
self.string[self.pos] != GROUP_CLOSE:
# Arguments unclosed.
self.parts.append(self.string[start_pos:self.pos])
return
self.pos += 1 # Move past closing brace.
self.parts.append(Call(ident, args, self.string[start_pos:self.pos]))
def parse_argument_list(self):
"""Parse a list of arguments starting at ``pos``, returning a
list of Expression objects. Does not modify ``parts``. Should
leave ``pos`` pointing to a } character or the end of the
string.
"""
# Try to parse a subexpression in a subparser.
expressions = []
while self.pos < len(self.string):
subparser = Parser(self.string[self.pos:], in_argument=True)
subparser.parse_expression()
# Extract and advance past the parsed expression.
expressions.append(Expression(subparser.parts))
self.pos += subparser.pos
if self.pos >= len(self.string) or \
self.string[self.pos] == GROUP_CLOSE:
# Argument list terminated by EOF or closing brace.
break
# Only other way to terminate an expression is with ,.
# Continue to the next argument.
assert self.string[self.pos] == ARG_SEP
self.pos += 1
return expressions
def _parse_ident(self):
"""Parse an identifier and return it (possibly an empty string).
Updates ``pos``.
"""
remainder = self.string[self.pos:]
ident = re.match(r'\w*', remainder).group(0)
self.pos += len(ident)
return ident
def _parse(template):
"""Parse a top-level template string Expression. Any extraneous text
is considered literal text.
"""
parser = Parser(template)
parser.parse_expression()
parts = parser.parts
remainder = parser.string[parser.pos:]
if remainder:
parts.append(remainder)
return Expression(parts)
# External interface.
class Template(object):
"""A string template, including text, Symbols, and Calls.
"""
def __init__(self, template):
self.expr = _parse(template)
self.original = template
self.compiled = self.translate()
def __eq__(self, other):
return self.original == other.original
def interpret(self, values={}, functions={}):
"""Like `substitute`, but forces the interpreter (rather than
the compiled version) to be used. The interpreter includes
exception-handling code for missing variables and buggy template
functions but is much slower.
"""
return self.expr.evaluate(Environment(values, functions))
def substitute(self, values={}, functions={}):
"""Evaluate the template given the values and functions.
"""
try:
res = self.compiled(values, functions)
except Exception: # Handle any exceptions thrown by compiled version.
res = self.interpret(values, functions)
return res
def translate(self):
"""Compile the template to a Python function."""
expressions, varnames, funcnames = self.expr.translate()
argnames = []
for varname in varnames:
argnames.append(VARIABLE_PREFIX + varname)
for funcname in funcnames:
argnames.append(FUNCTION_PREFIX + funcname)
func = compile_func(
argnames,
[ast.Return(ast.List(expressions, ast.Load()))],
)
def wrapper_func(values={}, functions={}):
args = {}
for varname in varnames:
args[VARIABLE_PREFIX + varname] = values[varname]
for funcname in funcnames:
args[FUNCTION_PREFIX + funcname] = functions[funcname]
parts = func(**args)
return u''.join(parts)
return wrapper_func
# Performance tests.
if __name__ == '__main__':
import timeit
_tmpl = Template(u'foo $bar %baz{foozle $bar barzle} $bar')
_vars = {'bar': 'qux'}
_funcs = {'baz': six.text_type.upper}
interp_time = timeit.timeit('_tmpl.interpret(_vars, _funcs)',
'from __main__ import _tmpl, _vars, _funcs',
number=10000)
print(interp_time)
comp_time = timeit.timeit('_tmpl.substitute(_vars, _funcs)',
'from __main__ import _tmpl, _vars, _funcs',
number=10000)
print(comp_time)
print(u'Speedup:', interp_time / comp_time)

View file

@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Simple library to work out if a file is hidden on different platforms."""
from __future__ import division, absolute_import, print_function
import os
import stat
import ctypes
import sys
import beets.util
def _is_hidden_osx(path):
"""Return whether or not a file is hidden on OS X.
This uses os.lstat to work out if a file has the "hidden" flag.
"""
file_stat = os.lstat(beets.util.syspath(path))
if hasattr(file_stat, 'st_flags') and hasattr(stat, 'UF_HIDDEN'):
return bool(file_stat.st_flags & stat.UF_HIDDEN)
else:
return False
def _is_hidden_win(path):
"""Return whether or not a file is hidden on Windows.
This uses GetFileAttributes to work out if a file has the "hidden" flag
(FILE_ATTRIBUTE_HIDDEN).
"""
# FILE_ATTRIBUTE_HIDDEN = 2 (0x2) from GetFileAttributes documentation.
hidden_mask = 2
# Retrieve the attributes for the file.
attrs = ctypes.windll.kernel32.GetFileAttributesW(beets.util.syspath(path))
# Ensure we have valid attribues and compare them against the mask.
return attrs >= 0 and attrs & hidden_mask
def _is_hidden_dot(path):
"""Return whether or not a file starts with a dot.
Files starting with a dot are seen as "hidden" files on Unix-based OSes.
"""
return os.path.basename(path).startswith(b'.')
def is_hidden(path):
"""Return whether or not a file is hidden. `path` should be a
bytestring filename.
This method works differently depending on the platform it is called on.
On OS X, it uses both the result of `is_hidden_osx` and `is_hidden_dot` to
work out if a file is hidden.
On Windows, it uses the result of `is_hidden_win` to work out if a file is
hidden.
On any other operating systems (i.e. Linux), it uses `is_hidden_dot` to
work out if a file is hidden.
"""
# Run platform specific functions depending on the platform
if sys.platform == 'darwin':
return _is_hidden_osx(path) or _is_hidden_dot(path)
elif sys.platform == 'win32':
return _is_hidden_win(path)
else:
return _is_hidden_dot(path)
__all__ = ['is_hidden']

View file

@ -0,0 +1,530 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Simple but robust implementation of generator/coroutine-based
pipelines in Python. The pipelines may be run either sequentially
(single-threaded) or in parallel (one thread per pipeline stage).
This implementation supports pipeline bubbles (indications that the
processing for a certain item should abort). To use them, yield the
BUBBLE constant from any stage coroutine except the last.
In the parallel case, the implementation transparently handles thread
shutdown when the processing is complete and when a stage raises an
exception. KeyboardInterrupts (^C) are also handled.
When running a parallel pipeline, it is also possible to use
multiple coroutines for the same pipeline stage; this lets you speed
up a bottleneck stage by dividing its work among multiple threads.
To do so, pass an iterable of coroutines to the Pipeline constructor
in place of any single coroutine.
"""
from __future__ import division, absolute_import, print_function
from six.moves import queue
from threading import Thread, Lock
import sys
import six
BUBBLE = '__PIPELINE_BUBBLE__'
POISON = '__PIPELINE_POISON__'
DEFAULT_QUEUE_SIZE = 16
def _invalidate_queue(q, val=None, sync=True):
"""Breaks a Queue such that it never blocks, always has size 1,
and has no maximum size. get()ing from the queue returns `val`,
which defaults to None. `sync` controls whether a lock is
required (because it's not reentrant!).
"""
def _qsize(len=len):
return 1
def _put(item):
pass
def _get():
return val
if sync:
q.mutex.acquire()
try:
# Originally, we set `maxsize` to 0 here, which is supposed to mean
# an unlimited queue size. However, there is a race condition since
# Python 3.2 when this attribute is changed while another thread is
# waiting in put()/get() due to a full/empty queue.
# Setting it to 2 is still hacky because Python does not give any
# guarantee what happens if Queue methods/attributes are overwritten
# when it is already in use. However, because of our dummy _put()
# and _get() methods, it provides a workaround to let the queue appear
# to be never empty or full.
# See issue https://github.com/beetbox/beets/issues/2078
q.maxsize = 2
q._qsize = _qsize
q._put = _put
q._get = _get
q.not_empty.notifyAll()
q.not_full.notifyAll()
finally:
if sync:
q.mutex.release()
class CountedQueue(queue.Queue):
"""A queue that keeps track of the number of threads that are
still feeding into it. The queue is poisoned when all threads are
finished with the queue.
"""
def __init__(self, maxsize=0):
queue.Queue.__init__(self, maxsize)
self.nthreads = 0
self.poisoned = False
def acquire(self):
"""Indicate that a thread will start putting into this queue.
Should not be called after the queue is already poisoned.
"""
with self.mutex:
assert not self.poisoned
assert self.nthreads >= 0
self.nthreads += 1
def release(self):
"""Indicate that a thread that was putting into this queue has
exited. If this is the last thread using the queue, the queue
is poisoned.
"""
with self.mutex:
self.nthreads -= 1
assert self.nthreads >= 0
if self.nthreads == 0:
# All threads are done adding to this queue. Poison it
# when it becomes empty.
self.poisoned = True
# Replacement _get invalidates when no items remain.
_old_get = self._get
def _get():
out = _old_get()
if not self.queue:
_invalidate_queue(self, POISON, False)
return out
if self.queue:
# Items remain.
self._get = _get
else:
# No items. Invalidate immediately.
_invalidate_queue(self, POISON, False)
class MultiMessage(object):
"""A message yielded by a pipeline stage encapsulating multiple
values to be sent to the next stage.
"""
def __init__(self, messages):
self.messages = messages
def multiple(messages):
"""Yield multiple([message, ..]) from a pipeline stage to send
multiple values to the next pipeline stage.
"""
return MultiMessage(messages)
def stage(func):
"""Decorate a function to become a simple stage.
>>> @stage
... def add(n, i):
... return i + n
>>> pipe = Pipeline([
... iter([1, 2, 3]),
... add(2),
... ])
>>> list(pipe.pull())
[3, 4, 5]
"""
def coro(*args):
task = None
while True:
task = yield task
task = func(*(args + (task,)))
return coro
def mutator_stage(func):
"""Decorate a function that manipulates items in a coroutine to
become a simple stage.
>>> @mutator_stage
... def setkey(key, item):
... item[key] = True
>>> pipe = Pipeline([
... iter([{'x': False}, {'a': False}]),
... setkey('x'),
... ])
>>> list(pipe.pull())
[{'x': True}, {'a': False, 'x': True}]
"""
def coro(*args):
task = None
while True:
task = yield task
func(*(args + (task,)))
return coro
def _allmsgs(obj):
"""Returns a list of all the messages encapsulated in obj. If obj
is a MultiMessage, returns its enclosed messages. If obj is BUBBLE,
returns an empty list. Otherwise, returns a list containing obj.
"""
if isinstance(obj, MultiMessage):
return obj.messages
elif obj == BUBBLE:
return []
else:
return [obj]
class PipelineThread(Thread):
"""Abstract base class for pipeline-stage threads."""
def __init__(self, all_threads):
super(PipelineThread, self).__init__()
self.abort_lock = Lock()
self.abort_flag = False
self.all_threads = all_threads
self.exc_info = None
def abort(self):
"""Shut down the thread at the next chance possible.
"""
with self.abort_lock:
self.abort_flag = True
# Ensure that we are not blocking on a queue read or write.
if hasattr(self, 'in_queue'):
_invalidate_queue(self.in_queue, POISON)
if hasattr(self, 'out_queue'):
_invalidate_queue(self.out_queue, POISON)
def abort_all(self, exc_info):
"""Abort all other threads in the system for an exception.
"""
self.exc_info = exc_info
for thread in self.all_threads:
thread.abort()
class FirstPipelineThread(PipelineThread):
"""The thread running the first stage in a parallel pipeline setup.
The coroutine should just be a generator.
"""
def __init__(self, coro, out_queue, all_threads):
super(FirstPipelineThread, self).__init__(all_threads)
self.coro = coro
self.out_queue = out_queue
self.out_queue.acquire()
self.abort_lock = Lock()
self.abort_flag = False
def run(self):
try:
while True:
with self.abort_lock:
if self.abort_flag:
return
# Get the value from the generator.
try:
msg = next(self.coro)
except StopIteration:
break
# Send messages to the next stage.
for msg in _allmsgs(msg):
with self.abort_lock:
if self.abort_flag:
return
self.out_queue.put(msg)
except BaseException:
self.abort_all(sys.exc_info())
return
# Generator finished; shut down the pipeline.
self.out_queue.release()
class MiddlePipelineThread(PipelineThread):
"""A thread running any stage in the pipeline except the first or
last.
"""
def __init__(self, coro, in_queue, out_queue, all_threads):
super(MiddlePipelineThread, self).__init__(all_threads)
self.coro = coro
self.in_queue = in_queue
self.out_queue = out_queue
self.out_queue.acquire()
def run(self):
try:
# Prime the coroutine.
next(self.coro)
while True:
with self.abort_lock:
if self.abort_flag:
return
# Get the message from the previous stage.
msg = self.in_queue.get()
if msg is POISON:
break
with self.abort_lock:
if self.abort_flag:
return
# Invoke the current stage.
out = self.coro.send(msg)
# Send messages to next stage.
for msg in _allmsgs(out):
with self.abort_lock:
if self.abort_flag:
return
self.out_queue.put(msg)
except BaseException:
self.abort_all(sys.exc_info())
return
# Pipeline is shutting down normally.
self.out_queue.release()
class LastPipelineThread(PipelineThread):
"""A thread running the last stage in a pipeline. The coroutine
should yield nothing.
"""
def __init__(self, coro, in_queue, all_threads):
super(LastPipelineThread, self).__init__(all_threads)
self.coro = coro
self.in_queue = in_queue
def run(self):
# Prime the coroutine.
next(self.coro)
try:
while True:
with self.abort_lock:
if self.abort_flag:
return
# Get the message from the previous stage.
msg = self.in_queue.get()
if msg is POISON:
break
with self.abort_lock:
if self.abort_flag:
return
# Send to consumer.
self.coro.send(msg)
except BaseException:
self.abort_all(sys.exc_info())
return
class Pipeline(object):
"""Represents a staged pattern of work. Each stage in the pipeline
is a coroutine that receives messages from the previous stage and
yields messages to be sent to the next stage.
"""
def __init__(self, stages):
"""Makes a new pipeline from a list of coroutines. There must
be at least two stages.
"""
if len(stages) < 2:
raise ValueError(u'pipeline must have at least two stages')
self.stages = []
for stage in stages:
if isinstance(stage, (list, tuple)):
self.stages.append(stage)
else:
# Default to one thread per stage.
self.stages.append((stage,))
def run_sequential(self):
"""Run the pipeline sequentially in the current thread. The
stages are run one after the other. Only the first coroutine
in each stage is used.
"""
list(self.pull())
def run_parallel(self, queue_size=DEFAULT_QUEUE_SIZE):
"""Run the pipeline in parallel using one thread per stage. The
messages between the stages are stored in queues of the given
size.
"""
queue_count = len(self.stages) - 1
queues = [CountedQueue(queue_size) for i in range(queue_count)]
threads = []
# Set up first stage.
for coro in self.stages[0]:
threads.append(FirstPipelineThread(coro, queues[0], threads))
# Middle stages.
for i in range(1, queue_count):
for coro in self.stages[i]:
threads.append(MiddlePipelineThread(
coro, queues[i - 1], queues[i], threads
))
# Last stage.
for coro in self.stages[-1]:
threads.append(
LastPipelineThread(coro, queues[-1], threads)
)
# Start threads.
for thread in threads:
thread.start()
# Wait for termination. The final thread lasts the longest.
try:
# Using a timeout allows us to receive KeyboardInterrupt
# exceptions during the join().
while threads[-1].is_alive():
threads[-1].join(1)
except BaseException:
# Stop all the threads immediately.
for thread in threads:
thread.abort()
raise
finally:
# Make completely sure that all the threads have finished
# before we return. They should already be either finished,
# in normal operation, or aborted, in case of an exception.
for thread in threads[:-1]:
thread.join()
for thread in threads:
exc_info = thread.exc_info
if exc_info:
# Make the exception appear as it was raised originally.
six.reraise(exc_info[0], exc_info[1], exc_info[2])
def pull(self):
"""Yield elements from the end of the pipeline. Runs the stages
sequentially until the last yields some messages. Each of the messages
is then yielded by ``pulled.next()``. If the pipeline has a consumer,
that is the last stage does not yield any messages, then pull will not
yield any messages. Only the first coroutine in each stage is used
"""
coros = [stage[0] for stage in self.stages]
# "Prime" the coroutines.
for coro in coros[1:]:
next(coro)
# Begin the pipeline.
for out in coros[0]:
msgs = _allmsgs(out)
for coro in coros[1:]:
next_msgs = []
for msg in msgs:
out = coro.send(msg)
next_msgs.extend(_allmsgs(out))
msgs = next_msgs
for msg in msgs:
yield msg
# Smoke test.
if __name__ == '__main__':
import time
# Test a normally-terminating pipeline both in sequence and
# in parallel.
def produce():
for i in range(5):
print(u'generating %i' % i)
time.sleep(1)
yield i
def work():
num = yield
while True:
print(u'processing %i' % num)
time.sleep(2)
num = yield num * 2
def consume():
while True:
num = yield
time.sleep(1)
print(u'received %i' % num)
ts_start = time.time()
Pipeline([produce(), work(), consume()]).run_sequential()
ts_seq = time.time()
Pipeline([produce(), work(), consume()]).run_parallel()
ts_par = time.time()
Pipeline([produce(), (work(), work()), consume()]).run_parallel()
ts_end = time.time()
print(u'Sequential time:', ts_seq - ts_start)
print(u'Parallel time:', ts_par - ts_seq)
print(u'Multiply-parallel time:', ts_end - ts_par)
print()
# Test a pipeline that raises an exception.
def exc_produce():
for i in range(10):
print(u'generating %i' % i)
time.sleep(1)
yield i
def exc_work():
num = yield
while True:
print(u'processing %i' % num)
time.sleep(3)
if num == 3:
raise Exception()
num = yield num * 2
def exc_consume():
while True:
num = yield
print(u'received %i' % num)
Pipeline([exc_produce(), exc_work(), exc_consume()]).run_parallel(1)