GNXSOFT.COM

This commit is contained in:
Iliyan Angelov
2025-09-26 00:15:37 +03:00
commit fe26b7cca4
16323 changed files with 2011881 additions and 0 deletions

View File

@@ -0,0 +1,172 @@
"""Distributed Task Queue."""
# :copyright: (c) 2017-2026 Asif Saif Uddin, celery core and individual
# contributors, All rights reserved.
# :copyright: (c) 2015-2016 Ask Solem. All rights reserved.
# :copyright: (c) 2012-2014 GoPivotal, Inc., All rights reserved.
# :copyright: (c) 2009 - 2012 Ask Solem and individual contributors,
# All rights reserved.
# :license: BSD (3 Clause), see LICENSE for more details.
import os
import re
import sys
from collections import namedtuple
# Lazy loading
from . import local
SERIES = 'emerald-rush'
__version__ = '5.3.4'
__author__ = 'Ask Solem'
__contact__ = 'auvipy@gmail.com'
__homepage__ = 'https://docs.celeryq.dev/'
__docformat__ = 'restructuredtext'
__keywords__ = 'task job queue distributed messaging actor'
# -eof meta-
__all__ = (
'Celery', 'bugreport', 'shared_task', 'Task',
'current_app', 'current_task', 'maybe_signature',
'chain', 'chord', 'chunks', 'group', 'signature',
'xmap', 'xstarmap', 'uuid',
)
VERSION_BANNER = f'{__version__} ({SERIES})'
version_info_t = namedtuple('version_info_t', (
'major', 'minor', 'micro', 'releaselevel', 'serial',
))
# bumpversion can only search for {current_version}
# so we have to parse the version here.
_temp = re.match(
r'(\d+)\.(\d+).(\d+)(.+)?', __version__).groups()
VERSION = version_info = version_info_t(
int(_temp[0]), int(_temp[1]), int(_temp[2]), _temp[3] or '', '')
del _temp
del re
if os.environ.get('C_IMPDEBUG'): # pragma: no cover
import builtins
def debug_import(name, locals=None, globals=None,
fromlist=None, level=-1, real_import=builtins.__import__):
glob = globals or getattr(sys, 'emarfteg_'[::-1])(1).f_globals
importer_name = glob and glob.get('__name__') or 'unknown'
print(f'-- {importer_name} imports {name}')
return real_import(name, locals, globals, fromlist, level)
builtins.__import__ = debug_import
# This is never executed, but tricks static analyzers (PyDev, PyCharm,
# pylint, etc.) into knowing the types of these symbols, and what
# they contain.
STATICA_HACK = True
globals()['kcah_acitats'[::-1].upper()] = False
if STATICA_HACK: # pragma: no cover
from celery._state import current_app, current_task
from celery.app import shared_task
from celery.app.base import Celery
from celery.app.task import Task
from celery.app.utils import bugreport
from celery.canvas import (chain, chord, chunks, group, maybe_signature, signature, subtask, xmap, # noqa
xstarmap)
from celery.utils import uuid
# Eventlet/gevent patching must happen before importing
# anything else, so these tools must be at top-level.
def _find_option_with_arg(argv, short_opts=None, long_opts=None):
"""Search argv for options specifying short and longopt alternatives.
Returns:
str: value for option found
Raises:
KeyError: if option not found.
"""
for i, arg in enumerate(argv):
if arg.startswith('-'):
if long_opts and arg.startswith('--'):
name, sep, val = arg.partition('=')
if name in long_opts:
return val if sep else argv[i + 1]
if short_opts and arg in short_opts:
return argv[i + 1]
raise KeyError('|'.join(short_opts or [] + long_opts or []))
def _patch_eventlet():
import eventlet.debug
eventlet.monkey_patch()
blockdetect = float(os.environ.get('EVENTLET_NOBLOCK', 0))
if blockdetect:
eventlet.debug.hub_blocking_detection(blockdetect, blockdetect)
def _patch_gevent():
import gevent.monkey
import gevent.signal
gevent.monkey.patch_all()
def maybe_patch_concurrency(argv=None, short_opts=None,
long_opts=None, patches=None):
"""Apply eventlet/gevent monkeypatches.
With short and long opt alternatives that specify the command line
option to set the pool, this makes sure that anything that needs
to be patched is completed as early as possible.
(e.g., eventlet/gevent monkey patches).
"""
argv = argv if argv else sys.argv
short_opts = short_opts if short_opts else ['-P']
long_opts = long_opts if long_opts else ['--pool']
patches = patches if patches else {'eventlet': _patch_eventlet,
'gevent': _patch_gevent}
try:
pool = _find_option_with_arg(argv, short_opts, long_opts)
except KeyError:
pass
else:
try:
patcher = patches[pool]
except KeyError:
pass
else:
patcher()
# set up eventlet/gevent environments ASAP
from celery import concurrency
if pool in concurrency.get_available_pool_names():
concurrency.get_implementation(pool)
# this just creates a new module, that imports stuff on first attribute
# access. This makes the library faster to use.
old_module, new_module = local.recreate_module( # pragma: no cover
__name__,
by_module={
'celery.app': ['Celery', 'bugreport', 'shared_task'],
'celery.app.task': ['Task'],
'celery._state': ['current_app', 'current_task'],
'celery.canvas': [
'Signature', 'chain', 'chord', 'chunks', 'group',
'signature', 'maybe_signature', 'subtask',
'xmap', 'xstarmap',
],
'celery.utils': ['uuid'],
},
__package__='celery', __file__=__file__,
__path__=__path__, __doc__=__doc__, __version__=__version__,
__author__=__author__, __contact__=__contact__,
__homepage__=__homepage__, __docformat__=__docformat__, local=local,
VERSION=VERSION, SERIES=SERIES, VERSION_BANNER=VERSION_BANNER,
version_info_t=version_info_t,
version_info=version_info,
maybe_patch_concurrency=maybe_patch_concurrency,
_find_option_with_arg=_find_option_with_arg,
)

View File

@@ -0,0 +1,19 @@
"""Entry-point for the :program:`celery` umbrella command."""
import sys
from . import maybe_patch_concurrency
__all__ = ('main',)
def main() -> None:
"""Entrypoint to the ``celery`` umbrella command."""
if 'multi' not in sys.argv:
maybe_patch_concurrency()
from celery.bin.celery import main as _main
sys.exit(_main())
if __name__ == '__main__': # pragma: no cover
main()

View File

@@ -0,0 +1,197 @@
"""Internal state.
This is an internal module containing thread state
like the ``current_app``, and ``current_task``.
This module shouldn't be used directly.
"""
import os
import sys
import threading
import weakref
from celery.local import Proxy
from celery.utils.threads import LocalStack
__all__ = (
'set_default_app', 'get_current_app', 'get_current_task',
'get_current_worker_task', 'current_app', 'current_task',
'connect_on_app_finalize',
)
#: Global default app used when no current app.
default_app = None
#: Function returning the app provided or the default app if none.
#:
#: The environment variable :envvar:`CELERY_TRACE_APP` is used to
#: trace app leaks. When enabled an exception is raised if there
#: is no active app.
app_or_default = None
#: List of all app instances (weakrefs), mustn't be used directly.
_apps = weakref.WeakSet()
#: Global set of functions to call whenever a new app is finalized.
#: Shared tasks, and built-in tasks are created by adding callbacks here.
_on_app_finalizers = set()
_task_join_will_block = False
def connect_on_app_finalize(callback):
"""Connect callback to be called when any app is finalized."""
_on_app_finalizers.add(callback)
return callback
def _announce_app_finalized(app):
callbacks = set(_on_app_finalizers)
for callback in callbacks:
callback(app)
def _set_task_join_will_block(blocks):
global _task_join_will_block
_task_join_will_block = blocks
def task_join_will_block():
return _task_join_will_block
class _TLS(threading.local):
#: Apps with the :attr:`~celery.app.base.BaseApp.set_as_current` attribute
#: sets this, so it will always contain the last instantiated app,
#: and is the default app returned by :func:`app_or_default`.
current_app = None
_tls = _TLS()
_task_stack = LocalStack()
#: Function used to push a task to the thread local stack
#: keeping track of the currently executing task.
#: You must remember to pop the task after.
push_current_task = _task_stack.push
#: Function used to pop a task from the thread local stack
#: keeping track of the currently executing task.
pop_current_task = _task_stack.pop
def set_default_app(app):
"""Set default app."""
global default_app
default_app = app
def _get_current_app():
if default_app is None:
#: creates the global fallback app instance.
from celery.app.base import Celery
set_default_app(Celery(
'default', fixups=[], set_as_current=False,
loader=os.environ.get('CELERY_LOADER') or 'default',
))
return _tls.current_app or default_app
def _set_current_app(app):
_tls.current_app = app
if os.environ.get('C_STRICT_APP'): # pragma: no cover
def get_current_app():
"""Return the current app."""
raise RuntimeError('USES CURRENT APP')
elif os.environ.get('C_WARN_APP'): # pragma: no cover
def get_current_app():
import traceback
print('-- USES CURRENT_APP', file=sys.stderr) # +
traceback.print_stack(file=sys.stderr)
return _get_current_app()
else:
get_current_app = _get_current_app
def get_current_task():
"""Currently executing task."""
return _task_stack.top
def get_current_worker_task():
"""Currently executing task, that was applied by the worker.
This is used to differentiate between the actual task
executed by the worker and any task that was called within
a task (using ``task.__call__`` or ``task.apply``)
"""
for task in reversed(_task_stack.stack):
if not task.request.called_directly:
return task
#: Proxy to current app.
current_app = Proxy(get_current_app)
#: Proxy to current task.
current_task = Proxy(get_current_task)
def _register_app(app):
_apps.add(app)
def _deregister_app(app):
_apps.discard(app)
def _get_active_apps():
return _apps
def _app_or_default(app=None):
if app is None:
return get_current_app()
return app
def _app_or_default_trace(app=None): # pragma: no cover
from traceback import print_stack
try:
from billiard.process import current_process
except ImportError:
current_process = None
if app is None:
if getattr(_tls, 'current_app', None):
print('-- RETURNING TO CURRENT APP --') # +
print_stack()
return _tls.current_app
if not current_process or current_process()._name == 'MainProcess':
raise Exception('DEFAULT APP')
print('-- RETURNING TO DEFAULT APP --') # +
print_stack()
return default_app
return app
def enable_trace():
"""Enable tracing of app instances."""
global app_or_default
app_or_default = _app_or_default_trace
def disable_trace():
"""Disable tracing of app instances."""
global app_or_default
app_or_default = _app_or_default
if os.environ.get('CELERY_TRACE_APP'): # pragma: no cover
enable_trace()
else:
disable_trace()

View File

@@ -0,0 +1,76 @@
"""Celery Application."""
from celery import _state
from celery._state import app_or_default, disable_trace, enable_trace, pop_current_task, push_current_task
from celery.local import Proxy
from .base import Celery
from .utils import AppPickler
__all__ = (
'Celery', 'AppPickler', 'app_or_default', 'default_app',
'bugreport', 'enable_trace', 'disable_trace', 'shared_task',
'push_current_task', 'pop_current_task',
)
#: Proxy always returning the app set as default.
default_app = Proxy(lambda: _state.default_app)
def bugreport(app=None):
"""Return information useful in bug reports."""
return (app or _state.get_current_app()).bugreport()
def shared_task(*args, **kwargs):
"""Create shared task (decorator).
This can be used by library authors to create tasks that'll work
for any app environment.
Returns:
~celery.local.Proxy: A proxy that always takes the task from the
current apps task registry.
Example:
>>> from celery import Celery, shared_task
>>> @shared_task
... def add(x, y):
... return x + y
...
>>> app1 = Celery(broker='amqp://')
>>> add.app is app1
True
>>> app2 = Celery(broker='redis://')
>>> add.app is app2
True
"""
def create_shared_task(**options):
def __inner(fun):
name = options.get('name')
# Set as shared task so that unfinalized apps,
# and future apps will register a copy of this task.
_state.connect_on_app_finalize(
lambda app: app._task_from_fun(fun, **options)
)
# Force all finalized apps to take this task as well.
for app in _state._get_active_apps():
if app.finalized:
with app._finalize_mutex:
app._task_from_fun(fun, **options)
# Return a proxy that always gets the task from the current
# apps task registry.
def task_by_cons():
app = _state.get_current_app()
return app.tasks[
name or app.gen_task_name(fun.__name__, fun.__module__)
]
return Proxy(task_by_cons)
return __inner
if len(args) == 1 and callable(args[0]):
return create_shared_task(**kwargs)(args[0])
return create_shared_task(*args, **kwargs)

View File

@@ -0,0 +1,614 @@
"""Sending/Receiving Messages (Kombu integration)."""
import numbers
from collections import namedtuple
from collections.abc import Mapping
from datetime import timedelta
from weakref import WeakValueDictionary
from kombu import Connection, Consumer, Exchange, Producer, Queue, pools
from kombu.common import Broadcast
from kombu.utils.functional import maybe_list
from kombu.utils.objects import cached_property
from celery import signals
from celery.utils.nodenames import anon_nodename
from celery.utils.saferepr import saferepr
from celery.utils.text import indent as textindent
from celery.utils.time import maybe_make_aware
from . import routes as _routes
__all__ = ('AMQP', 'Queues', 'task_message')
#: earliest date supported by time.mktime.
INT_MIN = -2147483648
#: Human readable queue declaration.
QUEUE_FORMAT = """
.> {0.name:<16} exchange={0.exchange.name}({0.exchange.type}) \
key={0.routing_key}
"""
task_message = namedtuple('task_message',
('headers', 'properties', 'body', 'sent_event'))
def utf8dict(d, encoding='utf-8'):
return {k.decode(encoding) if isinstance(k, bytes) else k: v
for k, v in d.items()}
class Queues(dict):
"""Queue name⇒ declaration mapping.
Arguments:
queues (Iterable): Initial list/tuple or dict of queues.
create_missing (bool): By default any unknown queues will be
added automatically, but if this flag is disabled the occurrence
of unknown queues in `wanted` will raise :exc:`KeyError`.
max_priority (int): Default x-max-priority for queues with none set.
"""
#: If set, this is a subset of queues to consume from.
#: The rest of the queues are then used for routing only.
_consume_from = None
def __init__(self, queues=None, default_exchange=None,
create_missing=True, autoexchange=None,
max_priority=None, default_routing_key=None):
super().__init__()
self.aliases = WeakValueDictionary()
self.default_exchange = default_exchange
self.default_routing_key = default_routing_key
self.create_missing = create_missing
self.autoexchange = Exchange if autoexchange is None else autoexchange
self.max_priority = max_priority
if queues is not None and not isinstance(queues, Mapping):
queues = {q.name: q for q in queues}
queues = queues or {}
for name, q in queues.items():
self.add(q) if isinstance(q, Queue) else self.add_compat(name, **q)
def __getitem__(self, name):
try:
return self.aliases[name]
except KeyError:
return super().__getitem__(name)
def __setitem__(self, name, queue):
if self.default_exchange and not queue.exchange:
queue.exchange = self.default_exchange
super().__setitem__(name, queue)
if queue.alias:
self.aliases[queue.alias] = queue
def __missing__(self, name):
if self.create_missing:
return self.add(self.new_missing(name))
raise KeyError(name)
def add(self, queue, **kwargs):
"""Add new queue.
The first argument can either be a :class:`kombu.Queue` instance,
or the name of a queue. If the former the rest of the keyword
arguments are ignored, and options are simply taken from the queue
instance.
Arguments:
queue (kombu.Queue, str): Queue to add.
exchange (kombu.Exchange, str):
if queue is str, specifies exchange name.
routing_key (str): if queue is str, specifies binding key.
exchange_type (str): if queue is str, specifies type of exchange.
**options (Any): Additional declaration options used when
queue is a str.
"""
if not isinstance(queue, Queue):
return self.add_compat(queue, **kwargs)
return self._add(queue)
def add_compat(self, name, **options):
# docs used to use binding_key as routing key
options.setdefault('routing_key', options.get('binding_key'))
if options['routing_key'] is None:
options['routing_key'] = name
return self._add(Queue.from_dict(name, **options))
def _add(self, queue):
if queue.exchange is None or queue.exchange.name == '':
queue.exchange = self.default_exchange
if not queue.routing_key:
queue.routing_key = self.default_routing_key
if self.max_priority is not None:
if queue.queue_arguments is None:
queue.queue_arguments = {}
self._set_max_priority(queue.queue_arguments)
self[queue.name] = queue
return queue
def _set_max_priority(self, args):
if 'x-max-priority' not in args and self.max_priority is not None:
return args.update({'x-max-priority': self.max_priority})
def format(self, indent=0, indent_first=True):
"""Format routing table into string for log dumps."""
active = self.consume_from
if not active:
return ''
info = [QUEUE_FORMAT.strip().format(q)
for _, q in sorted(active.items())]
if indent_first:
return textindent('\n'.join(info), indent)
return info[0] + '\n' + textindent('\n'.join(info[1:]), indent)
def select_add(self, queue, **kwargs):
"""Add new task queue that'll be consumed from.
The queue will be active even when a subset has been selected
using the :option:`celery worker -Q` option.
"""
q = self.add(queue, **kwargs)
if self._consume_from is not None:
self._consume_from[q.name] = q
return q
def select(self, include):
"""Select a subset of currently defined queues to consume from.
Arguments:
include (Sequence[str], str): Names of queues to consume from.
"""
if include:
self._consume_from = {
name: self[name] for name in maybe_list(include)
}
def deselect(self, exclude):
"""Deselect queues so that they won't be consumed from.
Arguments:
exclude (Sequence[str], str): Names of queues to avoid
consuming from.
"""
if exclude:
exclude = maybe_list(exclude)
if self._consume_from is None:
# using all queues
return self.select(k for k in self if k not in exclude)
# using selection
for queue in exclude:
self._consume_from.pop(queue, None)
def new_missing(self, name):
return Queue(name, self.autoexchange(name), name)
@property
def consume_from(self):
if self._consume_from is not None:
return self._consume_from
return self
class AMQP:
"""App AMQP API: app.amqp."""
Connection = Connection
Consumer = Consumer
Producer = Producer
#: compat alias to Connection
BrokerConnection = Connection
queues_cls = Queues
#: Cached and prepared routing table.
_rtable = None
#: Underlying producer pool instance automatically
#: set by the :attr:`producer_pool`.
_producer_pool = None
# Exchange class/function used when defining automatic queues.
# For example, you can use ``autoexchange = lambda n: None`` to use the
# AMQP default exchange: a shortcut to bypass routing
# and instead send directly to the queue named in the routing key.
autoexchange = None
#: Max size of positional argument representation used for
#: logging purposes.
argsrepr_maxsize = 1024
#: Max size of keyword argument representation used for logging purposes.
kwargsrepr_maxsize = 1024
def __init__(self, app):
self.app = app
self.task_protocols = {
1: self.as_task_v1,
2: self.as_task_v2,
}
self.app._conf.bind_to(self._handle_conf_update)
@cached_property
def create_task_message(self):
return self.task_protocols[self.app.conf.task_protocol]
@cached_property
def send_task_message(self):
return self._create_task_sender()
def Queues(self, queues, create_missing=None,
autoexchange=None, max_priority=None):
# Create new :class:`Queues` instance, using queue defaults
# from the current configuration.
conf = self.app.conf
default_routing_key = conf.task_default_routing_key
if create_missing is None:
create_missing = conf.task_create_missing_queues
if max_priority is None:
max_priority = conf.task_queue_max_priority
if not queues and conf.task_default_queue:
queues = (Queue(conf.task_default_queue,
exchange=self.default_exchange,
routing_key=default_routing_key),)
autoexchange = (self.autoexchange if autoexchange is None
else autoexchange)
return self.queues_cls(
queues, self.default_exchange, create_missing,
autoexchange, max_priority, default_routing_key,
)
def Router(self, queues=None, create_missing=None):
"""Return the current task router."""
return _routes.Router(self.routes, queues or self.queues,
self.app.either('task_create_missing_queues',
create_missing), app=self.app)
def flush_routes(self):
self._rtable = _routes.prepare(self.app.conf.task_routes)
def TaskConsumer(self, channel, queues=None, accept=None, **kw):
if accept is None:
accept = self.app.conf.accept_content
return self.Consumer(
channel, accept=accept,
queues=queues or list(self.queues.consume_from.values()),
**kw
)
def as_task_v2(self, task_id, name, args=None, kwargs=None,
countdown=None, eta=None, group_id=None, group_index=None,
expires=None, retries=0, chord=None,
callbacks=None, errbacks=None, reply_to=None,
time_limit=None, soft_time_limit=None,
create_sent_event=False, root_id=None, parent_id=None,
shadow=None, chain=None, now=None, timezone=None,
origin=None, ignore_result=False, argsrepr=None, kwargsrepr=None, stamped_headers=None,
**options):
args = args or ()
kwargs = kwargs or {}
if not isinstance(args, (list, tuple)):
raise TypeError('task args must be a list or tuple')
if not isinstance(kwargs, Mapping):
raise TypeError('task keyword arguments must be a mapping')
if countdown: # convert countdown to ETA
self._verify_seconds(countdown, 'countdown')
now = now or self.app.now()
timezone = timezone or self.app.timezone
eta = maybe_make_aware(
now + timedelta(seconds=countdown), tz=timezone,
)
if isinstance(expires, numbers.Real):
self._verify_seconds(expires, 'expires')
now = now or self.app.now()
timezone = timezone or self.app.timezone
expires = maybe_make_aware(
now + timedelta(seconds=expires), tz=timezone,
)
if not isinstance(eta, str):
eta = eta and eta.isoformat()
# If we retry a task `expires` will already be ISO8601-formatted.
if not isinstance(expires, str):
expires = expires and expires.isoformat()
if argsrepr is None:
argsrepr = saferepr(args, self.argsrepr_maxsize)
if kwargsrepr is None:
kwargsrepr = saferepr(kwargs, self.kwargsrepr_maxsize)
if not root_id: # empty root_id defaults to task_id
root_id = task_id
stamps = {header: options[header] for header in stamped_headers or []}
headers = {
'lang': 'py',
'task': name,
'id': task_id,
'shadow': shadow,
'eta': eta,
'expires': expires,
'group': group_id,
'group_index': group_index,
'retries': retries,
'timelimit': [time_limit, soft_time_limit],
'root_id': root_id,
'parent_id': parent_id,
'argsrepr': argsrepr,
'kwargsrepr': kwargsrepr,
'origin': origin or anon_nodename(),
'ignore_result': ignore_result,
'stamped_headers': stamped_headers,
'stamps': stamps,
}
return task_message(
headers=headers,
properties={
'correlation_id': task_id,
'reply_to': reply_to or '',
},
body=(
args, kwargs, {
'callbacks': callbacks,
'errbacks': errbacks,
'chain': chain,
'chord': chord,
},
),
sent_event={
'uuid': task_id,
'root_id': root_id,
'parent_id': parent_id,
'name': name,
'args': argsrepr,
'kwargs': kwargsrepr,
'retries': retries,
'eta': eta,
'expires': expires,
} if create_sent_event else None,
)
def as_task_v1(self, task_id, name, args=None, kwargs=None,
countdown=None, eta=None, group_id=None, group_index=None,
expires=None, retries=0,
chord=None, callbacks=None, errbacks=None, reply_to=None,
time_limit=None, soft_time_limit=None,
create_sent_event=False, root_id=None, parent_id=None,
shadow=None, now=None, timezone=None,
**compat_kwargs):
args = args or ()
kwargs = kwargs or {}
utc = self.utc
if not isinstance(args, (list, tuple)):
raise TypeError('task args must be a list or tuple')
if not isinstance(kwargs, Mapping):
raise TypeError('task keyword arguments must be a mapping')
if countdown: # convert countdown to ETA
self._verify_seconds(countdown, 'countdown')
now = now or self.app.now()
eta = now + timedelta(seconds=countdown)
if isinstance(expires, numbers.Real):
self._verify_seconds(expires, 'expires')
now = now or self.app.now()
expires = now + timedelta(seconds=expires)
eta = eta and eta.isoformat()
expires = expires and expires.isoformat()
return task_message(
headers={},
properties={
'correlation_id': task_id,
'reply_to': reply_to or '',
},
body={
'task': name,
'id': task_id,
'args': args,
'kwargs': kwargs,
'group': group_id,
'group_index': group_index,
'retries': retries,
'eta': eta,
'expires': expires,
'utc': utc,
'callbacks': callbacks,
'errbacks': errbacks,
'timelimit': (time_limit, soft_time_limit),
'taskset': group_id,
'chord': chord,
},
sent_event={
'uuid': task_id,
'name': name,
'args': saferepr(args),
'kwargs': saferepr(kwargs),
'retries': retries,
'eta': eta,
'expires': expires,
} if create_sent_event else None,
)
def _verify_seconds(self, s, what):
if s < INT_MIN:
raise ValueError(f'{what} is out of range: {s!r}')
return s
def _create_task_sender(self):
default_retry = self.app.conf.task_publish_retry
default_policy = self.app.conf.task_publish_retry_policy
default_delivery_mode = self.app.conf.task_default_delivery_mode
default_queue = self.default_queue
queues = self.queues
send_before_publish = signals.before_task_publish.send
before_receivers = signals.before_task_publish.receivers
send_after_publish = signals.after_task_publish.send
after_receivers = signals.after_task_publish.receivers
send_task_sent = signals.task_sent.send # XXX compat
sent_receivers = signals.task_sent.receivers
default_evd = self._event_dispatcher
default_exchange = self.default_exchange
default_rkey = self.app.conf.task_default_routing_key
default_serializer = self.app.conf.task_serializer
default_compressor = self.app.conf.task_compression
def send_task_message(producer, name, message,
exchange=None, routing_key=None, queue=None,
event_dispatcher=None,
retry=None, retry_policy=None,
serializer=None, delivery_mode=None,
compression=None, declare=None,
headers=None, exchange_type=None, **kwargs):
retry = default_retry if retry is None else retry
headers2, properties, body, sent_event = message
if headers:
headers2.update(headers)
if kwargs:
properties.update(kwargs)
qname = queue
if queue is None and exchange is None:
queue = default_queue
if queue is not None:
if isinstance(queue, str):
qname, queue = queue, queues[queue]
else:
qname = queue.name
if delivery_mode is None:
try:
delivery_mode = queue.exchange.delivery_mode
except AttributeError:
pass
delivery_mode = delivery_mode or default_delivery_mode
if exchange_type is None:
try:
exchange_type = queue.exchange.type
except AttributeError:
exchange_type = 'direct'
# convert to anon-exchange, when exchange not set and direct ex.
if (not exchange or not routing_key) and exchange_type == 'direct':
exchange, routing_key = '', qname
elif exchange is None:
# not topic exchange, and exchange not undefined
exchange = queue.exchange.name or default_exchange
routing_key = routing_key or queue.routing_key or default_rkey
if declare is None and queue and not isinstance(queue, Broadcast):
declare = [queue]
# merge default and custom policy
retry = default_retry if retry is None else retry
_rp = (dict(default_policy, **retry_policy) if retry_policy
else default_policy)
if before_receivers:
send_before_publish(
sender=name, body=body,
exchange=exchange, routing_key=routing_key,
declare=declare, headers=headers2,
properties=properties, retry_policy=retry_policy,
)
ret = producer.publish(
body,
exchange=exchange,
routing_key=routing_key,
serializer=serializer or default_serializer,
compression=compression or default_compressor,
retry=retry, retry_policy=_rp,
delivery_mode=delivery_mode, declare=declare,
headers=headers2,
**properties
)
if after_receivers:
send_after_publish(sender=name, body=body, headers=headers2,
exchange=exchange, routing_key=routing_key)
if sent_receivers: # XXX deprecated
if isinstance(body, tuple): # protocol version 2
send_task_sent(
sender=name, task_id=headers2['id'], task=name,
args=body[0], kwargs=body[1],
eta=headers2['eta'], taskset=headers2['group'],
)
else: # protocol version 1
send_task_sent(
sender=name, task_id=body['id'], task=name,
args=body['args'], kwargs=body['kwargs'],
eta=body['eta'], taskset=body['taskset'],
)
if sent_event:
evd = event_dispatcher or default_evd
exname = exchange
if isinstance(exname, Exchange):
exname = exname.name
sent_event.update({
'queue': qname,
'exchange': exname,
'routing_key': routing_key,
})
evd.publish('task-sent', sent_event,
producer, retry=retry, retry_policy=retry_policy)
return ret
return send_task_message
@cached_property
def default_queue(self):
return self.queues[self.app.conf.task_default_queue]
@cached_property
def queues(self):
"""Queue name⇒ declaration mapping."""
return self.Queues(self.app.conf.task_queues)
@queues.setter
def queues(self, queues):
return self.Queues(queues)
@property
def routes(self):
if self._rtable is None:
self.flush_routes()
return self._rtable
@cached_property
def router(self):
return self.Router()
@router.setter
def router(self, value):
return value
@property
def producer_pool(self):
if self._producer_pool is None:
self._producer_pool = pools.producers[
self.app.connection_for_write()]
self._producer_pool.limit = self.app.pool.limit
return self._producer_pool
publisher_pool = producer_pool # compat alias
@cached_property
def default_exchange(self):
return Exchange(self.app.conf.task_default_exchange,
self.app.conf.task_default_exchange_type)
@cached_property
def utc(self):
return self.app.conf.enable_utc
@cached_property
def _event_dispatcher(self):
# We call Dispatcher.publish with a custom producer
# so don't need the dispatcher to be enabled.
return self.app.events.Dispatcher(enabled=False)
def _handle_conf_update(self, *args, **kwargs):
if ('task_routes' in kwargs or 'task_routes' in args):
self.flush_routes()
self.router = self.Router()
return

View File

@@ -0,0 +1,52 @@
"""Task Annotations.
Annotations is a nice term for monkey-patching task classes
in the configuration.
This prepares and performs the annotations in the
:setting:`task_annotations` setting.
"""
from celery.utils.functional import firstmethod, mlazy
from celery.utils.imports import instantiate
_first_match = firstmethod('annotate')
_first_match_any = firstmethod('annotate_any')
__all__ = ('MapAnnotation', 'prepare', 'resolve_all')
class MapAnnotation(dict):
"""Annotation map: task_name => attributes."""
def annotate_any(self):
try:
return dict(self['*'])
except KeyError:
pass
def annotate(self, task):
try:
return dict(self[task.name])
except KeyError:
pass
def prepare(annotations):
"""Expand the :setting:`task_annotations` setting."""
def expand_annotation(annotation):
if isinstance(annotation, dict):
return MapAnnotation(annotation)
elif isinstance(annotation, str):
return mlazy(instantiate, annotation)
return annotation
if annotations is None:
return ()
elif not isinstance(annotations, (list, tuple)):
annotations = (annotations,)
return [expand_annotation(anno) for anno in annotations]
def resolve_all(anno, task):
"""Resolve all pending annotations."""
return (x for x in (_first_match(anno, task), _first_match_any(anno)) if x)

View File

@@ -0,0 +1,66 @@
"""Tasks auto-retry functionality."""
from vine.utils import wraps
from celery.exceptions import Ignore, Retry
from celery.utils.time import get_exponential_backoff_interval
def add_autoretry_behaviour(task, **options):
"""Wrap task's `run` method with auto-retry functionality."""
autoretry_for = tuple(
options.get('autoretry_for',
getattr(task, 'autoretry_for', ()))
)
dont_autoretry_for = tuple(
options.get('dont_autoretry_for',
getattr(task, 'dont_autoretry_for', ()))
)
retry_kwargs = options.get(
'retry_kwargs', getattr(task, 'retry_kwargs', {})
)
retry_backoff = float(
options.get('retry_backoff',
getattr(task, 'retry_backoff', False))
)
retry_backoff_max = int(
options.get('retry_backoff_max',
getattr(task, 'retry_backoff_max', 600))
)
retry_jitter = options.get(
'retry_jitter', getattr(task, 'retry_jitter', True)
)
if autoretry_for and not hasattr(task, '_orig_run'):
@wraps(task.run)
def run(*args, **kwargs):
try:
return task._orig_run(*args, **kwargs)
except Ignore:
# If Ignore signal occurs task shouldn't be retried,
# even if it suits autoretry_for list
raise
except Retry:
raise
except dont_autoretry_for:
raise
except autoretry_for as exc:
if retry_backoff:
retry_kwargs['countdown'] = \
get_exponential_backoff_interval(
factor=int(max(1.0, retry_backoff)),
retries=task.request.retries,
maximum=retry_backoff_max,
full_jitter=retry_jitter)
# Override max_retries
if hasattr(task, 'override_max_retries'):
retry_kwargs['max_retries'] = getattr(task,
'override_max_retries',
task.max_retries)
ret = task.retry(exc=exc, **retry_kwargs)
# Stop propagation
if hasattr(task, 'override_max_retries'):
delattr(task, 'override_max_retries')
raise ret
task._orig_run, task.run = task.run, run

View File

@@ -0,0 +1,68 @@
"""Backend selection."""
import sys
import types
from celery._state import current_app
from celery.exceptions import ImproperlyConfigured, reraise
from celery.utils.imports import load_extension_class_names, symbol_by_name
__all__ = ('by_name', 'by_url')
UNKNOWN_BACKEND = """
Unknown result backend: {0!r}. Did you spell that correctly? ({1!r})
"""
BACKEND_ALIASES = {
'rpc': 'celery.backends.rpc.RPCBackend',
'cache': 'celery.backends.cache:CacheBackend',
'redis': 'celery.backends.redis:RedisBackend',
'rediss': 'celery.backends.redis:RedisBackend',
'sentinel': 'celery.backends.redis:SentinelBackend',
'mongodb': 'celery.backends.mongodb:MongoBackend',
'db': 'celery.backends.database:DatabaseBackend',
'database': 'celery.backends.database:DatabaseBackend',
'elasticsearch': 'celery.backends.elasticsearch:ElasticsearchBackend',
'cassandra': 'celery.backends.cassandra:CassandraBackend',
'couchbase': 'celery.backends.couchbase:CouchbaseBackend',
'couchdb': 'celery.backends.couchdb:CouchBackend',
'cosmosdbsql': 'celery.backends.cosmosdbsql:CosmosDBSQLBackend',
'riak': 'celery.backends.riak:RiakBackend',
'file': 'celery.backends.filesystem:FilesystemBackend',
'disabled': 'celery.backends.base:DisabledBackend',
'consul': 'celery.backends.consul:ConsulBackend',
'dynamodb': 'celery.backends.dynamodb:DynamoDBBackend',
'azureblockblob': 'celery.backends.azureblockblob:AzureBlockBlobBackend',
'arangodb': 'celery.backends.arangodb:ArangoDbBackend',
's3': 'celery.backends.s3:S3Backend',
}
def by_name(backend=None, loader=None,
extension_namespace='celery.result_backends'):
"""Get backend class by name/alias."""
backend = backend or 'disabled'
loader = loader or current_app.loader
aliases = dict(BACKEND_ALIASES, **loader.override_backends)
aliases.update(load_extension_class_names(extension_namespace))
try:
cls = symbol_by_name(backend, aliases)
except ValueError as exc:
reraise(ImproperlyConfigured, ImproperlyConfigured(
UNKNOWN_BACKEND.strip().format(backend, exc)), sys.exc_info()[2])
if isinstance(cls, types.ModuleType):
raise ImproperlyConfigured(UNKNOWN_BACKEND.strip().format(
backend, 'is a Python module, not a backend class.'))
return cls
def by_url(backend=None, loader=None):
"""Get backend class by URL."""
url = None
if backend and '://' in backend:
url = backend
scheme, _, _ = url.partition('://')
if '+' in scheme:
backend, url = url.split('+', 1)
else:
backend = scheme
return by_name(backend, loader), url

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,187 @@
"""Built-in Tasks.
The built-in tasks are always available in all app instances.
"""
from celery._state import connect_on_app_finalize
from celery.utils.log import get_logger
__all__ = ()
logger = get_logger(__name__)
@connect_on_app_finalize
def add_backend_cleanup_task(app):
"""Task used to clean up expired results.
If the configured backend requires periodic cleanup this task is also
automatically configured to run every day at 4am (requires
:program:`celery beat` to be running).
"""
@app.task(name='celery.backend_cleanup', shared=False, lazy=False)
def backend_cleanup():
app.backend.cleanup()
return backend_cleanup
@connect_on_app_finalize
def add_accumulate_task(app):
"""Task used by Task.replace when replacing task with group."""
@app.task(bind=True, name='celery.accumulate', shared=False, lazy=False)
def accumulate(self, *args, **kwargs):
index = kwargs.get('index')
return args[index] if index is not None else args
return accumulate
@connect_on_app_finalize
def add_unlock_chord_task(app):
"""Task used by result backends without native chord support.
Will joins chord by creating a task chain polling the header
for completion.
"""
from celery.canvas import maybe_signature
from celery.exceptions import ChordError
from celery.result import allow_join_result, result_from_tuple
@app.task(name='celery.chord_unlock', max_retries=None, shared=False,
default_retry_delay=app.conf.result_chord_retry_interval, ignore_result=True, lazy=False, bind=True)
def unlock_chord(self, group_id, callback, interval=None,
max_retries=None, result=None,
Result=app.AsyncResult, GroupResult=app.GroupResult,
result_from_tuple=result_from_tuple, **kwargs):
if interval is None:
interval = self.default_retry_delay
# check if the task group is ready, and if so apply the callback.
callback = maybe_signature(callback, app)
deps = GroupResult(
group_id,
[result_from_tuple(r, app=app) for r in result],
app=app,
)
j = deps.join_native if deps.supports_native_join else deps.join
try:
ready = deps.ready()
except Exception as exc:
raise self.retry(
exc=exc, countdown=interval, max_retries=max_retries,
)
else:
if not ready:
raise self.retry(countdown=interval, max_retries=max_retries)
callback = maybe_signature(callback, app=app)
try:
with allow_join_result():
ret = j(
timeout=app.conf.result_chord_join_timeout,
propagate=True,
)
except Exception as exc: # pylint: disable=broad-except
try:
culprit = next(deps._failed_join_report())
reason = f'Dependency {culprit.id} raised {exc!r}'
except StopIteration:
reason = repr(exc)
logger.exception('Chord %r raised: %r', group_id, exc)
app.backend.chord_error_from_stack(callback, ChordError(reason))
else:
try:
callback.delay(ret)
except Exception as exc: # pylint: disable=broad-except
logger.exception('Chord %r raised: %r', group_id, exc)
app.backend.chord_error_from_stack(
callback,
exc=ChordError(f'Callback error: {exc!r}'),
)
return unlock_chord
@connect_on_app_finalize
def add_map_task(app):
from celery.canvas import signature
@app.task(name='celery.map', shared=False, lazy=False)
def xmap(task, it):
task = signature(task, app=app).type
return [task(item) for item in it]
return xmap
@connect_on_app_finalize
def add_starmap_task(app):
from celery.canvas import signature
@app.task(name='celery.starmap', shared=False, lazy=False)
def xstarmap(task, it):
task = signature(task, app=app).type
return [task(*item) for item in it]
return xstarmap
@connect_on_app_finalize
def add_chunk_task(app):
from celery.canvas import chunks as _chunks
@app.task(name='celery.chunks', shared=False, lazy=False)
def chunks(task, it, n):
return _chunks.apply_chunks(task, it, n)
return chunks
@connect_on_app_finalize
def add_group_task(app):
"""No longer used, but here for backwards compatibility."""
from celery.canvas import maybe_signature
from celery.result import result_from_tuple
@app.task(name='celery.group', bind=True, shared=False, lazy=False)
def group(self, tasks, result, group_id, partial_args, add_to_parent=True):
app = self.app
result = result_from_tuple(result, app)
# any partial args are added to all tasks in the group
taskit = (maybe_signature(task, app=app).clone(partial_args)
for i, task in enumerate(tasks))
with app.producer_or_acquire() as producer:
[stask.apply_async(group_id=group_id, producer=producer,
add_to_parent=False) for stask in taskit]
parent = app.current_worker_task
if add_to_parent and parent:
parent.add_trail(result)
return result
return group
@connect_on_app_finalize
def add_chain_task(app):
"""No longer used, but here for backwards compatibility."""
@app.task(name='celery.chain', shared=False, lazy=False)
def chain(*args, **kwargs):
raise NotImplementedError('chain is not a real task')
return chain
@connect_on_app_finalize
def add_chord_task(app):
"""No longer used, but here for backwards compatibility."""
from celery import chord as _chord
from celery import group
from celery.canvas import maybe_signature
@app.task(name='celery.chord', bind=True, ignore_result=False,
shared=False, lazy=False)
def chord(self, header, body, partial_args=(), interval=None,
countdown=1, max_retries=None, eager=False, **kwargs):
app = self.app
# - convert back to group if serialized
tasks = header.tasks if isinstance(header, group) else header
header = group([
maybe_signature(s, app=app) for s in tasks
], app=self.app)
body = maybe_signature(body, app=app)
ch = _chord(header, body)
return ch.run(header, body, partial_args, app, interval,
countdown, max_retries, **kwargs)
return chord

View File

@@ -0,0 +1,779 @@
"""Worker Remote Control Client.
Client for worker remote control commands.
Server implementation is in :mod:`celery.worker.control`.
There are two types of remote control commands:
* Inspect commands: Does not have side effects, will usually just return some value
found in the worker, like the list of currently registered tasks, the list of active tasks, etc.
Commands are accessible via :class:`Inspect` class.
* Control commands: Performs side effects, like adding a new queue to consume from.
Commands are accessible via :class:`Control` class.
"""
import warnings
from billiard.common import TERM_SIGNAME
from kombu.matcher import match
from kombu.pidbox import Mailbox
from kombu.utils.compat import register_after_fork
from kombu.utils.functional import lazy
from kombu.utils.objects import cached_property
from celery.exceptions import DuplicateNodenameWarning
from celery.utils.log import get_logger
from celery.utils.text import pluralize
__all__ = ('Inspect', 'Control', 'flatten_reply')
logger = get_logger(__name__)
W_DUPNODE = """\
Received multiple replies from node {0}: {1}.
Please make sure you give each node a unique nodename using
the celery worker `-n` option.\
"""
def flatten_reply(reply):
"""Flatten node replies.
Convert from a list of replies in this format::
[{'a@example.com': reply},
{'b@example.com': reply}]
into this format::
{'a@example.com': reply,
'b@example.com': reply}
"""
nodes, dupes = {}, set()
for item in reply:
[dupes.add(name) for name in item if name in nodes]
nodes.update(item)
if dupes:
warnings.warn(DuplicateNodenameWarning(
W_DUPNODE.format(
pluralize(len(dupes), 'name'), ', '.join(sorted(dupes)),
),
))
return nodes
def _after_fork_cleanup_control(control):
try:
control._after_fork()
except Exception as exc: # pylint: disable=broad-except
logger.info('after fork raised exception: %r', exc, exc_info=1)
class Inspect:
"""API for inspecting workers.
This class provides proxy for accessing Inspect API of workers. The API is
defined in :py:mod:`celery.worker.control`
"""
app = None
def __init__(self, destination=None, timeout=1.0, callback=None,
connection=None, app=None, limit=None, pattern=None,
matcher=None):
self.app = app or self.app
self.destination = destination
self.timeout = timeout
self.callback = callback
self.connection = connection
self.limit = limit
self.pattern = pattern
self.matcher = matcher
def _prepare(self, reply):
if reply:
by_node = flatten_reply(reply)
if (self.destination and
not isinstance(self.destination, (list, tuple))):
return by_node.get(self.destination)
if self.pattern:
pattern = self.pattern
matcher = self.matcher
return {node: reply for node, reply in by_node.items()
if match(node, pattern, matcher)}
return by_node
def _request(self, command, **kwargs):
return self._prepare(self.app.control.broadcast(
command,
arguments=kwargs,
destination=self.destination,
callback=self.callback,
connection=self.connection,
limit=self.limit,
timeout=self.timeout, reply=True,
pattern=self.pattern, matcher=self.matcher,
))
def report(self):
"""Return human readable report for each worker.
Returns:
Dict: Dictionary ``{HOSTNAME: {'ok': REPORT_STRING}}``.
"""
return self._request('report')
def clock(self):
"""Get the Clock value on workers.
>>> app.control.inspect().clock()
{'celery@node1': {'clock': 12}}
Returns:
Dict: Dictionary ``{HOSTNAME: CLOCK_VALUE}``.
"""
return self._request('clock')
def active(self, safe=None):
"""Return list of tasks currently executed by workers.
Arguments:
safe (Boolean): Set to True to disable deserialization.
Returns:
Dict: Dictionary ``{HOSTNAME: [TASK_INFO,...]}``.
See Also:
For ``TASK_INFO`` details see :func:`query_task` return value.
"""
return self._request('active', safe=safe)
def scheduled(self, safe=None):
"""Return list of scheduled tasks with details.
Returns:
Dict: Dictionary ``{HOSTNAME: [TASK_SCHEDULED_INFO,...]}``.
Here is the list of ``TASK_SCHEDULED_INFO`` fields:
* ``eta`` - scheduled time for task execution as string in ISO 8601 format
* ``priority`` - priority of the task
* ``request`` - field containing ``TASK_INFO`` value.
See Also:
For more details about ``TASK_INFO`` see :func:`query_task` return value.
"""
return self._request('scheduled')
def reserved(self, safe=None):
"""Return list of currently reserved tasks, not including scheduled/active.
Returns:
Dict: Dictionary ``{HOSTNAME: [TASK_INFO,...]}``.
See Also:
For ``TASK_INFO`` details see :func:`query_task` return value.
"""
return self._request('reserved')
def stats(self):
"""Return statistics of worker.
Returns:
Dict: Dictionary ``{HOSTNAME: STAT_INFO}``.
Here is the list of ``STAT_INFO`` fields:
* ``broker`` - Section for broker information.
* ``connect_timeout`` - Timeout in seconds (int/float) for establishing a new connection.
* ``heartbeat`` - Current heartbeat value (set by client).
* ``hostname`` - Node name of the remote broker.
* ``insist`` - No longer used.
* ``login_method`` - Login method used to connect to the broker.
* ``port`` - Port of the remote broker.
* ``ssl`` - SSL enabled/disabled.
* ``transport`` - Name of transport used (e.g., amqp or redis)
* ``transport_options`` - Options passed to transport.
* ``uri_prefix`` - Some transports expects the host name to be a URL.
E.g. ``redis+socket:///tmp/redis.sock``.
In this example the URI-prefix will be redis.
* ``userid`` - User id used to connect to the broker with.
* ``virtual_host`` - Virtual host used.
* ``clock`` - Value of the workers logical clock. This is a positive integer
and should be increasing every time you receive statistics.
* ``uptime`` - Numbers of seconds since the worker controller was started
* ``pid`` - Process id of the worker instance (Main process).
* ``pool`` - Pool-specific section.
* ``max-concurrency`` - Max number of processes/threads/green threads.
* ``max-tasks-per-child`` - Max number of tasks a thread may execute before being recycled.
* ``processes`` - List of PIDs (or thread-ids).
* ``put-guarded-by-semaphore`` - Internal
* ``timeouts`` - Default values for time limits.
* ``writes`` - Specific to the prefork pool, this shows the distribution
of writes to each process in the pool when using async I/O.
* ``prefetch_count`` - Current prefetch count value for the task consumer.
* ``rusage`` - System usage statistics. The fields available may be different on your platform.
From :manpage:`getrusage(2)`:
* ``stime`` - Time spent in operating system code on behalf of this process.
* ``utime`` - Time spent executing user instructions.
* ``maxrss`` - The maximum resident size used by this process (in kilobytes).
* ``idrss`` - Amount of non-shared memory used for data (in kilobytes times
ticks of execution)
* ``isrss`` - Amount of non-shared memory used for stack space
(in kilobytes times ticks of execution)
* ``ixrss`` - Amount of memory shared with other processes
(in kilobytes times ticks of execution).
* ``inblock`` - Number of times the file system had to read from the disk
on behalf of this process.
* ``oublock`` - Number of times the file system has to write to disk
on behalf of this process.
* ``majflt`` - Number of page faults that were serviced by doing I/O.
* ``minflt`` - Number of page faults that were serviced without doing I/O.
* ``msgrcv`` - Number of IPC messages received.
* ``msgsnd`` - Number of IPC messages sent.
* ``nvcsw`` - Number of times this process voluntarily invoked a context switch.
* ``nivcsw`` - Number of times an involuntary context switch took place.
* ``nsignals`` - Number of signals received.
* ``nswap`` - The number of times this process was swapped entirely
out of memory.
* ``total`` - Map of task names and the total number of tasks with that type
the worker has accepted since start-up.
"""
return self._request('stats')
def revoked(self):
"""Return list of revoked tasks.
>>> app.control.inspect().revoked()
{'celery@node1': ['16f527de-1c72-47a6-b477-c472b92fef7a']}
Returns:
Dict: Dictionary ``{HOSTNAME: [TASK_ID, ...]}``.
"""
return self._request('revoked')
def registered(self, *taskinfoitems):
"""Return all registered tasks per worker.
>>> app.control.inspect().registered()
{'celery@node1': ['task1', 'task1']}
>>> app.control.inspect().registered('serializer', 'max_retries')
{'celery@node1': ['task_foo [serializer=json max_retries=3]', 'tasb_bar [serializer=json max_retries=3]']}
Arguments:
taskinfoitems (Sequence[str]): List of :class:`~celery.app.task.Task`
attributes to include.
Returns:
Dict: Dictionary ``{HOSTNAME: [TASK1_INFO, ...]}``.
"""
return self._request('registered', taskinfoitems=taskinfoitems)
registered_tasks = registered
def ping(self, destination=None):
"""Ping all (or specific) workers.
>>> app.control.inspect().ping()
{'celery@node1': {'ok': 'pong'}, 'celery@node2': {'ok': 'pong'}}
>>> app.control.inspect().ping(destination=['celery@node1'])
{'celery@node1': {'ok': 'pong'}}
Arguments:
destination (List): If set, a list of the hosts to send the
command to, when empty broadcast to all workers.
Returns:
Dict: Dictionary ``{HOSTNAME: {'ok': 'pong'}}``.
See Also:
:meth:`broadcast` for supported keyword arguments.
"""
if destination:
self.destination = destination
return self._request('ping')
def active_queues(self):
"""Return information about queues from which worker consumes tasks.
Returns:
Dict: Dictionary ``{HOSTNAME: [QUEUE_INFO, QUEUE_INFO,...]}``.
Here is the list of ``QUEUE_INFO`` fields:
* ``name``
* ``exchange``
* ``name``
* ``type``
* ``arguments``
* ``durable``
* ``passive``
* ``auto_delete``
* ``delivery_mode``
* ``no_declare``
* ``routing_key``
* ``queue_arguments``
* ``binding_arguments``
* ``consumer_arguments``
* ``durable``
* ``exclusive``
* ``auto_delete``
* ``no_ack``
* ``alias``
* ``bindings``
* ``no_declare``
* ``expires``
* ``message_ttl``
* ``max_length``
* ``max_length_bytes``
* ``max_priority``
See Also:
See the RabbitMQ/AMQP documentation for more details about
``queue_info`` fields.
Note:
The ``queue_info`` fields are RabbitMQ/AMQP oriented.
Not all fields applies for other transports.
"""
return self._request('active_queues')
def query_task(self, *ids):
"""Return detail of tasks currently executed by workers.
Arguments:
*ids (str): IDs of tasks to be queried.
Returns:
Dict: Dictionary ``{HOSTNAME: {TASK_ID: [STATE, TASK_INFO]}}``.
Here is the list of ``TASK_INFO`` fields:
* ``id`` - ID of the task
* ``name`` - Name of the task
* ``args`` - Positinal arguments passed to the task
* ``kwargs`` - Keyword arguments passed to the task
* ``type`` - Type of the task
* ``hostname`` - Hostname of the worker processing the task
* ``time_start`` - Time of processing start
* ``acknowledged`` - True when task was acknowledged to broker
* ``delivery_info`` - Dictionary containing delivery information
* ``exchange`` - Name of exchange where task was published
* ``routing_key`` - Routing key used when task was published
* ``priority`` - Priority used when task was published
* ``redelivered`` - True if the task was redelivered
* ``worker_pid`` - PID of worker processin the task
"""
# signature used be unary: query_task(ids=[id1, id2])
# we need this to preserve backward compatibility.
if len(ids) == 1 and isinstance(ids[0], (list, tuple)):
ids = ids[0]
return self._request('query_task', ids=ids)
def conf(self, with_defaults=False):
"""Return configuration of each worker.
Arguments:
with_defaults (bool): if set to True, method returns also
configuration options with default values.
Returns:
Dict: Dictionary ``{HOSTNAME: WORKER_CONFIGURATION}``.
See Also:
``WORKER_CONFIGURATION`` is a dictionary containing current configuration options.
See :ref:`configuration` for possible values.
"""
return self._request('conf', with_defaults=with_defaults)
def hello(self, from_node, revoked=None):
return self._request('hello', from_node=from_node, revoked=revoked)
def memsample(self):
"""Return sample current RSS memory usage.
Note:
Requires the psutils library.
"""
return self._request('memsample')
def memdump(self, samples=10):
"""Dump statistics of previous memsample requests.
Note:
Requires the psutils library.
"""
return self._request('memdump', samples=samples)
def objgraph(self, type='Request', n=200, max_depth=10):
"""Create graph of uncollected objects (memory-leak debugging).
Arguments:
n (int): Max number of objects to graph.
max_depth (int): Traverse at most n levels deep.
type (str): Name of object to graph. Default is ``"Request"``.
Returns:
Dict: Dictionary ``{'filename': FILENAME}``
Note:
Requires the objgraph library.
"""
return self._request('objgraph', num=n, max_depth=max_depth, type=type)
class Control:
"""Worker remote control client."""
Mailbox = Mailbox
def __init__(self, app=None):
self.app = app
self.mailbox = self.Mailbox(
app.conf.control_exchange,
type='fanout',
accept=app.conf.accept_content,
serializer=app.conf.task_serializer,
producer_pool=lazy(lambda: self.app.amqp.producer_pool),
queue_ttl=app.conf.control_queue_ttl,
reply_queue_ttl=app.conf.control_queue_ttl,
queue_expires=app.conf.control_queue_expires,
reply_queue_expires=app.conf.control_queue_expires,
)
register_after_fork(self, _after_fork_cleanup_control)
def _after_fork(self):
del self.mailbox.producer_pool
@cached_property
def inspect(self):
"""Create new :class:`Inspect` instance."""
return self.app.subclass_with_self(Inspect, reverse='control.inspect')
def purge(self, connection=None):
"""Discard all waiting tasks.
This will ignore all tasks waiting for execution, and they will
be deleted from the messaging server.
Arguments:
connection (kombu.Connection): Optional specific connection
instance to use. If not provided a connection will
be acquired from the connection pool.
Returns:
int: the number of tasks discarded.
"""
with self.app.connection_or_acquire(connection) as conn:
return self.app.amqp.TaskConsumer(conn).purge()
discard_all = purge
def election(self, id, topic, action=None, connection=None):
self.broadcast(
'election', connection=connection, destination=None,
arguments={
'id': id, 'topic': topic, 'action': action,
},
)
def revoke(self, task_id, destination=None, terminate=False,
signal=TERM_SIGNAME, **kwargs):
"""Tell all (or specific) workers to revoke a task by id (or list of ids).
If a task is revoked, the workers will ignore the task and
not execute it after all.
Arguments:
task_id (Union(str, list)): Id of the task to revoke
(or list of ids).
terminate (bool): Also terminate the process currently working
on the task (if any).
signal (str): Name of signal to send to process if terminate.
Default is TERM.
See Also:
:meth:`broadcast` for supported keyword arguments.
"""
return self.broadcast('revoke', destination=destination, arguments={
'task_id': task_id,
'terminate': terminate,
'signal': signal,
}, **kwargs)
def revoke_by_stamped_headers(self, headers, destination=None, terminate=False,
signal=TERM_SIGNAME, **kwargs):
"""
Tell all (or specific) workers to revoke a task by headers.
If a task is revoked, the workers will ignore the task and
not execute it after all.
Arguments:
headers (dict[str, Union(str, list)]): Headers to match when revoking tasks.
terminate (bool): Also terminate the process currently working
on the task (if any).
signal (str): Name of signal to send to process if terminate.
Default is TERM.
See Also:
:meth:`broadcast` for supported keyword arguments.
"""
result = self.broadcast('revoke_by_stamped_headers', destination=destination, arguments={
'headers': headers,
'terminate': terminate,
'signal': signal,
}, **kwargs)
task_ids = set()
if result:
for host in result:
for response in host.values():
task_ids.update(response['ok'])
if task_ids:
return self.revoke(list(task_ids), destination=destination, terminate=terminate, signal=signal, **kwargs)
else:
return result
def terminate(self, task_id,
destination=None, signal=TERM_SIGNAME, **kwargs):
"""Tell all (or specific) workers to terminate a task by id (or list of ids).
See Also:
This is just a shortcut to :meth:`revoke` with the terminate
argument enabled.
"""
return self.revoke(
task_id,
destination=destination, terminate=True, signal=signal, **kwargs)
def ping(self, destination=None, timeout=1.0, **kwargs):
"""Ping all (or specific) workers.
>>> app.control.ping()
[{'celery@node1': {'ok': 'pong'}}, {'celery@node2': {'ok': 'pong'}}]
>>> app.control.ping(destination=['celery@node2'])
[{'celery@node2': {'ok': 'pong'}}]
Returns:
List[Dict]: List of ``{HOSTNAME: {'ok': 'pong'}}`` dictionaries.
See Also:
:meth:`broadcast` for supported keyword arguments.
"""
return self.broadcast(
'ping', reply=True, arguments={}, destination=destination,
timeout=timeout, **kwargs)
def rate_limit(self, task_name, rate_limit, destination=None, **kwargs):
"""Tell workers to set a new rate limit for task by type.
Arguments:
task_name (str): Name of task to change rate limit for.
rate_limit (int, str): The rate limit as tasks per second,
or a rate limit string (`'100/m'`, etc.
see :attr:`celery.app.task.Task.rate_limit` for
more information).
See Also:
:meth:`broadcast` for supported keyword arguments.
"""
return self.broadcast(
'rate_limit',
destination=destination,
arguments={
'task_name': task_name,
'rate_limit': rate_limit,
},
**kwargs)
def add_consumer(self, queue,
exchange=None, exchange_type='direct', routing_key=None,
options=None, destination=None, **kwargs):
"""Tell all (or specific) workers to start consuming from a new queue.
Only the queue name is required as if only the queue is specified
then the exchange/routing key will be set to the same name (
like automatic queues do).
Note:
This command does not respect the default queue/exchange
options in the configuration.
Arguments:
queue (str): Name of queue to start consuming from.
exchange (str): Optional name of exchange.
exchange_type (str): Type of exchange (defaults to 'direct')
command to, when empty broadcast to all workers.
routing_key (str): Optional routing key.
options (Dict): Additional options as supported
by :meth:`kombu.entity.Queue.from_dict`.
See Also:
:meth:`broadcast` for supported keyword arguments.
"""
return self.broadcast(
'add_consumer',
destination=destination,
arguments=dict({
'queue': queue,
'exchange': exchange,
'exchange_type': exchange_type,
'routing_key': routing_key,
}, **options or {}),
**kwargs
)
def cancel_consumer(self, queue, destination=None, **kwargs):
"""Tell all (or specific) workers to stop consuming from ``queue``.
See Also:
Supports the same arguments as :meth:`broadcast`.
"""
return self.broadcast(
'cancel_consumer', destination=destination,
arguments={'queue': queue}, **kwargs)
def time_limit(self, task_name, soft=None, hard=None,
destination=None, **kwargs):
"""Tell workers to set time limits for a task by type.
Arguments:
task_name (str): Name of task to change time limits for.
soft (float): New soft time limit (in seconds).
hard (float): New hard time limit (in seconds).
**kwargs (Any): arguments passed on to :meth:`broadcast`.
"""
return self.broadcast(
'time_limit',
arguments={
'task_name': task_name,
'hard': hard,
'soft': soft,
},
destination=destination,
**kwargs)
def enable_events(self, destination=None, **kwargs):
"""Tell all (or specific) workers to enable events.
See Also:
Supports the same arguments as :meth:`broadcast`.
"""
return self.broadcast(
'enable_events', arguments={}, destination=destination, **kwargs)
def disable_events(self, destination=None, **kwargs):
"""Tell all (or specific) workers to disable events.
See Also:
Supports the same arguments as :meth:`broadcast`.
"""
return self.broadcast(
'disable_events', arguments={}, destination=destination, **kwargs)
def pool_grow(self, n=1, destination=None, **kwargs):
"""Tell all (or specific) workers to grow the pool by ``n``.
See Also:
Supports the same arguments as :meth:`broadcast`.
"""
return self.broadcast(
'pool_grow', arguments={'n': n}, destination=destination, **kwargs)
def pool_shrink(self, n=1, destination=None, **kwargs):
"""Tell all (or specific) workers to shrink the pool by ``n``.
See Also:
Supports the same arguments as :meth:`broadcast`.
"""
return self.broadcast(
'pool_shrink', arguments={'n': n},
destination=destination, **kwargs)
def autoscale(self, max, min, destination=None, **kwargs):
"""Change worker(s) autoscale setting.
See Also:
Supports the same arguments as :meth:`broadcast`.
"""
return self.broadcast(
'autoscale', arguments={'max': max, 'min': min},
destination=destination, **kwargs)
def shutdown(self, destination=None, **kwargs):
"""Shutdown worker(s).
See Also:
Supports the same arguments as :meth:`broadcast`
"""
return self.broadcast(
'shutdown', arguments={}, destination=destination, **kwargs)
def pool_restart(self, modules=None, reload=False, reloader=None,
destination=None, **kwargs):
"""Restart the execution pools of all or specific workers.
Keyword Arguments:
modules (Sequence[str]): List of modules to reload.
reload (bool): Flag to enable module reloading. Default is False.
reloader (Any): Function to reload a module.
destination (Sequence[str]): List of worker names to send this
command to.
See Also:
Supports the same arguments as :meth:`broadcast`
"""
return self.broadcast(
'pool_restart',
arguments={
'modules': modules,
'reload': reload,
'reloader': reloader,
},
destination=destination, **kwargs)
def heartbeat(self, destination=None, **kwargs):
"""Tell worker(s) to send a heartbeat immediately.
See Also:
Supports the same arguments as :meth:`broadcast`
"""
return self.broadcast(
'heartbeat', arguments={}, destination=destination, **kwargs)
def broadcast(self, command, arguments=None, destination=None,
connection=None, reply=False, timeout=1.0, limit=None,
callback=None, channel=None, pattern=None, matcher=None,
**extra_kwargs):
"""Broadcast a control command to the celery workers.
Arguments:
command (str): Name of command to send.
arguments (Dict): Keyword arguments for the command.
destination (List): If set, a list of the hosts to send the
command to, when empty broadcast to all workers.
connection (kombu.Connection): Custom broker connection to use,
if not set, a connection will be acquired from the pool.
reply (bool): Wait for and return the reply.
timeout (float): Timeout in seconds to wait for the reply.
limit (int): Limit number of replies.
callback (Callable): Callback called immediately for
each reply received.
pattern (str): Custom pattern string to match
matcher (Callable): Custom matcher to run the pattern to match
"""
with self.app.connection_or_acquire(connection) as conn:
arguments = dict(arguments or {}, **extra_kwargs)
if pattern and matcher:
# tests pass easier without requiring pattern/matcher to
# always be sent in
return self.mailbox(conn)._broadcast(
command, arguments, destination, reply, timeout,
limit, callback, channel=channel,
pattern=pattern, matcher=matcher,
)
else:
return self.mailbox(conn)._broadcast(
command, arguments, destination, reply, timeout,
limit, callback, channel=channel,
)

View File

@@ -0,0 +1,414 @@
"""Configuration introspection and defaults."""
from collections import deque, namedtuple
from datetime import timedelta
from celery.utils.functional import memoize
from celery.utils.serialization import strtobool
__all__ = ('Option', 'NAMESPACES', 'flatten', 'find')
DEFAULT_POOL = 'prefork'
DEFAULT_ACCEPT_CONTENT = ('json',)
DEFAULT_PROCESS_LOG_FMT = """
[%(asctime)s: %(levelname)s/%(processName)s] %(message)s
""".strip()
DEFAULT_TASK_LOG_FMT = """[%(asctime)s: %(levelname)s/%(processName)s] \
%(task_name)s[%(task_id)s]: %(message)s"""
DEFAULT_SECURITY_DIGEST = 'sha256'
OLD_NS = {'celery_{0}'}
OLD_NS_BEAT = {'celerybeat_{0}'}
OLD_NS_WORKER = {'celeryd_{0}'}
searchresult = namedtuple('searchresult', ('namespace', 'key', 'type'))
def Namespace(__old__=None, **options):
if __old__ is not None:
for key, opt in options.items():
if not opt.old:
opt.old = {o.format(key) for o in __old__}
return options
def old_ns(ns):
return {f'{ns}_{{0}}'}
class Option:
"""Describes a Celery configuration option."""
alt = None
deprecate_by = None
remove_by = None
old = set()
typemap = {'string': str, 'int': int, 'float': float, 'any': lambda v: v,
'bool': strtobool, 'dict': dict, 'tuple': tuple}
def __init__(self, default=None, *args, **kwargs):
self.default = default
self.type = kwargs.get('type') or 'string'
for attr, value in kwargs.items():
setattr(self, attr, value)
def to_python(self, value):
return self.typemap[self.type](value)
def __repr__(self):
return '<Option: type->{} default->{!r}>'.format(self.type,
self.default)
NAMESPACES = Namespace(
accept_content=Option(DEFAULT_ACCEPT_CONTENT, type='list', old=OLD_NS),
result_accept_content=Option(None, type='list'),
enable_utc=Option(True, type='bool'),
imports=Option((), type='tuple', old=OLD_NS),
include=Option((), type='tuple', old=OLD_NS),
timezone=Option(type='string', old=OLD_NS),
beat=Namespace(
__old__=OLD_NS_BEAT,
max_loop_interval=Option(0, type='float'),
schedule=Option({}, type='dict'),
scheduler=Option('celery.beat:PersistentScheduler'),
schedule_filename=Option('celerybeat-schedule'),
sync_every=Option(0, type='int'),
cron_starting_deadline=Option(None, type=int)
),
broker=Namespace(
url=Option(None, type='string'),
read_url=Option(None, type='string'),
write_url=Option(None, type='string'),
transport=Option(type='string'),
transport_options=Option({}, type='dict'),
connection_timeout=Option(4, type='float'),
connection_retry=Option(True, type='bool'),
connection_retry_on_startup=Option(None, type='bool'),
connection_max_retries=Option(100, type='int'),
channel_error_retry=Option(False, type='bool'),
failover_strategy=Option(None, type='string'),
heartbeat=Option(120, type='int'),
heartbeat_checkrate=Option(3.0, type='int'),
login_method=Option(None, type='string'),
pool_limit=Option(10, type='int'),
use_ssl=Option(False, type='bool'),
host=Option(type='string'),
port=Option(type='int'),
user=Option(type='string'),
password=Option(type='string'),
vhost=Option(type='string'),
),
cache=Namespace(
__old__=old_ns('celery_cache'),
backend=Option(),
backend_options=Option({}, type='dict'),
),
cassandra=Namespace(
entry_ttl=Option(type='float'),
keyspace=Option(type='string'),
port=Option(type='string'),
read_consistency=Option(type='string'),
servers=Option(type='list'),
bundle_path=Option(type='string'),
table=Option(type='string'),
write_consistency=Option(type='string'),
auth_provider=Option(type='string'),
auth_kwargs=Option(type='string'),
options=Option({}, type='dict'),
),
s3=Namespace(
access_key_id=Option(type='string'),
secret_access_key=Option(type='string'),
bucket=Option(type='string'),
base_path=Option(type='string'),
endpoint_url=Option(type='string'),
region=Option(type='string'),
),
azureblockblob=Namespace(
container_name=Option('celery', type='string'),
retry_initial_backoff_sec=Option(2, type='int'),
retry_increment_base=Option(2, type='int'),
retry_max_attempts=Option(3, type='int'),
base_path=Option('', type='string'),
connection_timeout=Option(20, type='int'),
read_timeout=Option(120, type='int'),
),
control=Namespace(
queue_ttl=Option(300.0, type='float'),
queue_expires=Option(10.0, type='float'),
exchange=Option('celery', type='string'),
),
couchbase=Namespace(
__old__=old_ns('celery_couchbase'),
backend_settings=Option(None, type='dict'),
),
arangodb=Namespace(
__old__=old_ns('celery_arangodb'),
backend_settings=Option(None, type='dict')
),
mongodb=Namespace(
__old__=old_ns('celery_mongodb'),
backend_settings=Option(type='dict'),
),
cosmosdbsql=Namespace(
database_name=Option('celerydb', type='string'),
collection_name=Option('celerycol', type='string'),
consistency_level=Option('Session', type='string'),
max_retry_attempts=Option(9, type='int'),
max_retry_wait_time=Option(30, type='int'),
),
event=Namespace(
__old__=old_ns('celery_event'),
queue_expires=Option(60.0, type='float'),
queue_ttl=Option(5.0, type='float'),
queue_prefix=Option('celeryev'),
serializer=Option('json'),
exchange=Option('celeryev', type='string'),
),
redis=Namespace(
__old__=old_ns('celery_redis'),
backend_use_ssl=Option(type='dict'),
db=Option(type='int'),
host=Option(type='string'),
max_connections=Option(type='int'),
username=Option(type='string'),
password=Option(type='string'),
port=Option(type='int'),
socket_timeout=Option(120.0, type='float'),
socket_connect_timeout=Option(None, type='float'),
retry_on_timeout=Option(False, type='bool'),
socket_keepalive=Option(False, type='bool'),
),
result=Namespace(
__old__=old_ns('celery_result'),
backend=Option(type='string'),
cache_max=Option(
-1,
type='int', old={'celery_max_cached_results'},
),
compression=Option(type='str'),
exchange=Option('celeryresults'),
exchange_type=Option('direct'),
expires=Option(
timedelta(days=1),
type='float', old={'celery_task_result_expires'},
),
persistent=Option(None, type='bool'),
extended=Option(False, type='bool'),
serializer=Option('json'),
backend_transport_options=Option({}, type='dict'),
chord_retry_interval=Option(1.0, type='float'),
chord_join_timeout=Option(3.0, type='float'),
backend_max_sleep_between_retries_ms=Option(10000, type='int'),
backend_max_retries=Option(float("inf"), type='float'),
backend_base_sleep_between_retries_ms=Option(10, type='int'),
backend_always_retry=Option(False, type='bool'),
),
elasticsearch=Namespace(
__old__=old_ns('celery_elasticsearch'),
retry_on_timeout=Option(type='bool'),
max_retries=Option(type='int'),
timeout=Option(type='float'),
save_meta_as_text=Option(True, type='bool'),
),
security=Namespace(
__old__=old_ns('celery_security'),
certificate=Option(type='string'),
cert_store=Option(type='string'),
key=Option(type='string'),
key_password=Option(type='bytes'),
digest=Option(DEFAULT_SECURITY_DIGEST, type='string'),
),
database=Namespace(
url=Option(old={'celery_result_dburi'}),
engine_options=Option(
type='dict', old={'celery_result_engine_options'},
),
short_lived_sessions=Option(
False, type='bool', old={'celery_result_db_short_lived_sessions'},
),
table_schemas=Option(type='dict'),
table_names=Option(type='dict', old={'celery_result_db_tablenames'}),
),
task=Namespace(
__old__=OLD_NS,
acks_late=Option(False, type='bool'),
acks_on_failure_or_timeout=Option(True, type='bool'),
always_eager=Option(False, type='bool'),
annotations=Option(type='any'),
compression=Option(type='string', old={'celery_message_compression'}),
create_missing_queues=Option(True, type='bool'),
inherit_parent_priority=Option(False, type='bool'),
default_delivery_mode=Option(2, type='string'),
default_queue=Option('celery'),
default_exchange=Option(None, type='string'), # taken from queue
default_exchange_type=Option('direct'),
default_routing_key=Option(None, type='string'), # taken from queue
default_rate_limit=Option(type='string'),
default_priority=Option(None, type='string'),
eager_propagates=Option(
False, type='bool', old={'celery_eager_propagates_exceptions'},
),
ignore_result=Option(False, type='bool'),
store_eager_result=Option(False, type='bool'),
protocol=Option(2, type='int', old={'celery_task_protocol'}),
publish_retry=Option(
True, type='bool', old={'celery_task_publish_retry'},
),
publish_retry_policy=Option(
{'max_retries': 3,
'interval_start': 0,
'interval_max': 1,
'interval_step': 0.2},
type='dict', old={'celery_task_publish_retry_policy'},
),
queues=Option(type='dict'),
queue_max_priority=Option(None, type='int'),
reject_on_worker_lost=Option(type='bool'),
remote_tracebacks=Option(False, type='bool'),
routes=Option(type='any'),
send_sent_event=Option(
False, type='bool', old={'celery_send_task_sent_event'},
),
serializer=Option('json', old={'celery_task_serializer'}),
soft_time_limit=Option(
type='float', old={'celeryd_task_soft_time_limit'},
),
time_limit=Option(
type='float', old={'celeryd_task_time_limit'},
),
store_errors_even_if_ignored=Option(False, type='bool'),
track_started=Option(False, type='bool'),
allow_error_cb_on_chord_header=Option(False, type='bool'),
),
worker=Namespace(
__old__=OLD_NS_WORKER,
agent=Option(None, type='string'),
autoscaler=Option('celery.worker.autoscale:Autoscaler'),
cancel_long_running_tasks_on_connection_loss=Option(
False, type='bool'
),
concurrency=Option(None, type='int'),
consumer=Option('celery.worker.consumer:Consumer', type='string'),
direct=Option(False, type='bool', old={'celery_worker_direct'}),
disable_rate_limits=Option(
False, type='bool', old={'celery_disable_rate_limits'},
),
deduplicate_successful_tasks=Option(
False, type='bool'
),
enable_remote_control=Option(
True, type='bool', old={'celery_enable_remote_control'},
),
hijack_root_logger=Option(True, type='bool'),
log_color=Option(type='bool'),
log_format=Option(DEFAULT_PROCESS_LOG_FMT),
lost_wait=Option(10.0, type='float', old={'celeryd_worker_lost_wait'}),
max_memory_per_child=Option(type='int'),
max_tasks_per_child=Option(type='int'),
pool=Option(DEFAULT_POOL),
pool_putlocks=Option(True, type='bool'),
pool_restarts=Option(False, type='bool'),
proc_alive_timeout=Option(4.0, type='float'),
prefetch_multiplier=Option(4, type='int'),
redirect_stdouts=Option(
True, type='bool', old={'celery_redirect_stdouts'},
),
redirect_stdouts_level=Option(
'WARNING', old={'celery_redirect_stdouts_level'},
),
send_task_events=Option(
False, type='bool', old={'celery_send_events'},
),
state_db=Option(),
task_log_format=Option(DEFAULT_TASK_LOG_FMT),
timer=Option(type='string'),
timer_precision=Option(1.0, type='float'),
),
)
def _flatten_keys(ns, key, opt):
return [(ns + key, opt)]
def _to_compat(ns, key, opt):
if opt.old:
return [
(oldkey.format(key).upper(), ns + key, opt)
for oldkey in opt.old
]
return [((ns + key).upper(), ns + key, opt)]
def flatten(d, root='', keyfilter=_flatten_keys):
"""Flatten settings."""
stack = deque([(root, d)])
while stack:
ns, options = stack.popleft()
for key, opt in options.items():
if isinstance(opt, dict):
stack.append((ns + key + '_', opt))
else:
yield from keyfilter(ns, key, opt)
DEFAULTS = {
key: opt.default for key, opt in flatten(NAMESPACES)
}
__compat = list(flatten(NAMESPACES, keyfilter=_to_compat))
_OLD_DEFAULTS = {old_key: opt.default for old_key, _, opt in __compat}
_TO_OLD_KEY = {new_key: old_key for old_key, new_key, _ in __compat}
_TO_NEW_KEY = {old_key: new_key for old_key, new_key, _ in __compat}
__compat = None
SETTING_KEYS = set(DEFAULTS.keys())
_OLD_SETTING_KEYS = set(_TO_NEW_KEY.keys())
def find_deprecated_settings(source): # pragma: no cover
from celery.utils import deprecated
for name, opt in flatten(NAMESPACES):
if (opt.deprecate_by or opt.remove_by) and getattr(source, name, None):
deprecated.warn(description=f'The {name!r} setting',
deprecation=opt.deprecate_by,
removal=opt.remove_by,
alternative=f'Use the {opt.alt} instead')
return source
@memoize(maxsize=None)
def find(name, namespace='celery'):
"""Find setting by name."""
# - Try specified name-space first.
namespace = namespace.lower()
try:
return searchresult(
namespace, name.lower(), NAMESPACES[namespace][name.lower()],
)
except KeyError:
# - Try all the other namespaces.
for ns, opts in NAMESPACES.items():
if ns.lower() == name.lower():
return searchresult(None, ns, opts)
elif isinstance(opts, dict):
try:
return searchresult(ns, name.lower(), opts[name.lower()])
except KeyError:
pass
# - See if name is a qualname last.
return searchresult(None, name.lower(), DEFAULTS[name.lower()])

View File

@@ -0,0 +1,40 @@
"""Implementation for the app.events shortcuts."""
from contextlib import contextmanager
from kombu.utils.objects import cached_property
class Events:
"""Implements app.events."""
receiver_cls = 'celery.events.receiver:EventReceiver'
dispatcher_cls = 'celery.events.dispatcher:EventDispatcher'
state_cls = 'celery.events.state:State'
def __init__(self, app=None):
self.app = app
@cached_property
def Receiver(self):
return self.app.subclass_with_self(
self.receiver_cls, reverse='events.Receiver')
@cached_property
def Dispatcher(self):
return self.app.subclass_with_self(
self.dispatcher_cls, reverse='events.Dispatcher')
@cached_property
def State(self):
return self.app.subclass_with_self(
self.state_cls, reverse='events.State')
@contextmanager
def default_dispatcher(self, hostname=None, enabled=True,
buffer_while_offline=False):
with self.app.amqp.producer_pool.acquire(block=True) as prod:
# pylint: disable=too-many-function-args
# This is a property pylint...
with self.Dispatcher(prod.connection, hostname, enabled,
prod.channel, buffer_while_offline) as d:
yield d

View File

@@ -0,0 +1,247 @@
"""Logging configuration.
The Celery instances logging section: ``Celery.log``.
Sets up logging for the worker and other programs,
redirects standard outs, colors log output, patches logging
related compatibility fixes, and so on.
"""
import logging
import os
import sys
import warnings
from logging.handlers import WatchedFileHandler
from kombu.utils.encoding import set_default_encoding_file
from celery import signals
from celery._state import get_current_task
from celery.exceptions import CDeprecationWarning, CPendingDeprecationWarning
from celery.local import class_property
from celery.utils.log import (ColorFormatter, LoggingProxy, get_logger, get_multiprocessing_logger, mlevel,
reset_multiprocessing_logger)
from celery.utils.nodenames import node_format
from celery.utils.term import colored
__all__ = ('TaskFormatter', 'Logging')
MP_LOG = os.environ.get('MP_LOG', False)
class TaskFormatter(ColorFormatter):
"""Formatter for tasks, adding the task name and id."""
def format(self, record):
task = get_current_task()
if task and task.request:
record.__dict__.update(task_id=task.request.id,
task_name=task.name)
else:
record.__dict__.setdefault('task_name', '???')
record.__dict__.setdefault('task_id', '???')
return super().format(record)
class Logging:
"""Application logging setup (app.log)."""
#: The logging subsystem is only configured once per process.
#: setup_logging_subsystem sets this flag, and subsequent calls
#: will do nothing.
_setup = False
def __init__(self, app):
self.app = app
self.loglevel = mlevel(logging.WARN)
self.format = self.app.conf.worker_log_format
self.task_format = self.app.conf.worker_task_log_format
self.colorize = self.app.conf.worker_log_color
def setup(self, loglevel=None, logfile=None, redirect_stdouts=False,
redirect_level='WARNING', colorize=None, hostname=None):
loglevel = mlevel(loglevel)
handled = self.setup_logging_subsystem(
loglevel, logfile, colorize=colorize, hostname=hostname,
)
if not handled and redirect_stdouts:
self.redirect_stdouts(redirect_level)
os.environ.update(
CELERY_LOG_LEVEL=str(loglevel) if loglevel else '',
CELERY_LOG_FILE=str(logfile) if logfile else '',
)
warnings.filterwarnings('always', category=CDeprecationWarning)
warnings.filterwarnings('always', category=CPendingDeprecationWarning)
logging.captureWarnings(True)
return handled
def redirect_stdouts(self, loglevel=None, name='celery.redirected'):
self.redirect_stdouts_to_logger(
get_logger(name), loglevel=loglevel
)
os.environ.update(
CELERY_LOG_REDIRECT='1',
CELERY_LOG_REDIRECT_LEVEL=str(loglevel or ''),
)
def setup_logging_subsystem(self, loglevel=None, logfile=None, format=None,
colorize=None, hostname=None, **kwargs):
if self.already_setup:
return
if logfile and hostname:
logfile = node_format(logfile, hostname)
Logging._setup = True
loglevel = mlevel(loglevel or self.loglevel)
format = format or self.format
colorize = self.supports_color(colorize, logfile)
reset_multiprocessing_logger()
receivers = signals.setup_logging.send(
sender=None, loglevel=loglevel, logfile=logfile,
format=format, colorize=colorize,
)
if not receivers:
root = logging.getLogger()
if self.app.conf.worker_hijack_root_logger:
root.handlers = []
get_logger('celery').handlers = []
get_logger('celery.task').handlers = []
get_logger('celery.redirected').handlers = []
# Configure root logger
self._configure_logger(
root, logfile, loglevel, format, colorize, **kwargs
)
# Configure the multiprocessing logger
self._configure_logger(
get_multiprocessing_logger(),
logfile, loglevel if MP_LOG else logging.ERROR,
format, colorize, **kwargs
)
signals.after_setup_logger.send(
sender=None, logger=root,
loglevel=loglevel, logfile=logfile,
format=format, colorize=colorize,
)
# then setup the root task logger.
self.setup_task_loggers(loglevel, logfile, colorize=colorize)
try:
stream = logging.getLogger().handlers[0].stream
except (AttributeError, IndexError):
pass
else:
set_default_encoding_file(stream)
# This is a hack for multiprocessing's fork+exec, so that
# logging before Process.run works.
logfile_name = logfile if isinstance(logfile, str) else ''
os.environ.update(_MP_FORK_LOGLEVEL_=str(loglevel),
_MP_FORK_LOGFILE_=logfile_name,
_MP_FORK_LOGFORMAT_=format)
return receivers
def _configure_logger(self, logger, logfile, loglevel,
format, colorize, **kwargs):
if logger is not None:
self.setup_handlers(logger, logfile, format,
colorize, **kwargs)
if loglevel:
logger.setLevel(loglevel)
def setup_task_loggers(self, loglevel=None, logfile=None, format=None,
colorize=None, propagate=False, **kwargs):
"""Setup the task logger.
If `logfile` is not specified, then `sys.stderr` is used.
Will return the base task logger object.
"""
loglevel = mlevel(loglevel or self.loglevel)
format = format or self.task_format
colorize = self.supports_color(colorize, logfile)
logger = self.setup_handlers(
get_logger('celery.task'),
logfile, format, colorize,
formatter=TaskFormatter, **kwargs
)
logger.setLevel(loglevel)
# this is an int for some reason, better to not question why.
logger.propagate = int(propagate)
signals.after_setup_task_logger.send(
sender=None, logger=logger,
loglevel=loglevel, logfile=logfile,
format=format, colorize=colorize,
)
return logger
def redirect_stdouts_to_logger(self, logger, loglevel=None,
stdout=True, stderr=True):
"""Redirect :class:`sys.stdout` and :class:`sys.stderr` to logger.
Arguments:
logger (logging.Logger): Logger instance to redirect to.
loglevel (int, str): The loglevel redirected message
will be logged as.
"""
proxy = LoggingProxy(logger, loglevel)
if stdout:
sys.stdout = proxy
if stderr:
sys.stderr = proxy
return proxy
def supports_color(self, colorize=None, logfile=None):
colorize = self.colorize if colorize is None else colorize
if self.app.IS_WINDOWS:
# Windows does not support ANSI color codes.
return False
if colorize or colorize is None:
# Only use color if there's no active log file
# and stderr is an actual terminal.
return logfile is None and sys.stderr.isatty()
return colorize
def colored(self, logfile=None, enabled=None):
return colored(enabled=self.supports_color(enabled, logfile))
def setup_handlers(self, logger, logfile, format, colorize,
formatter=ColorFormatter, **kwargs):
if self._is_configured(logger):
return logger
handler = self._detect_handler(logfile)
handler.setFormatter(formatter(format, use_color=colorize))
logger.addHandler(handler)
return logger
def _detect_handler(self, logfile=None):
"""Create handler from filename, an open stream or `None` (stderr)."""
logfile = sys.__stderr__ if logfile is None else logfile
if hasattr(logfile, 'write'):
return logging.StreamHandler(logfile)
return WatchedFileHandler(logfile, encoding='utf-8')
def _has_handler(self, logger):
return any(
not isinstance(h, logging.NullHandler)
for h in logger.handlers or []
)
def _is_configured(self, logger):
return self._has_handler(logger) and not getattr(
logger, '_rudimentary_setup', False)
def get_default_logger(self, name='celery', **kwargs):
return get_logger(name)
@class_property
def already_setup(self):
return self._setup
@already_setup.setter
def already_setup(self, was_setup):
self._setup = was_setup

View File

@@ -0,0 +1,68 @@
"""Registry of available tasks."""
import inspect
from importlib import import_module
from celery._state import get_current_app
from celery.app.autoretry import add_autoretry_behaviour
from celery.exceptions import InvalidTaskError, NotRegistered
__all__ = ('TaskRegistry',)
class TaskRegistry(dict):
"""Map of registered tasks."""
NotRegistered = NotRegistered
def __missing__(self, key):
raise self.NotRegistered(key)
def register(self, task):
"""Register a task in the task registry.
The task will be automatically instantiated if not already an
instance. Name must be configured prior to registration.
"""
if task.name is None:
raise InvalidTaskError(
'Task class {!r} must specify .name attribute'.format(
type(task).__name__))
task = inspect.isclass(task) and task() or task
add_autoretry_behaviour(task)
self[task.name] = task
def unregister(self, name):
"""Unregister task by name.
Arguments:
name (str): name of the task to unregister, or a
:class:`celery.app.task.Task` with a valid `name` attribute.
Raises:
celery.exceptions.NotRegistered: if the task is not registered.
"""
try:
self.pop(getattr(name, 'name', name))
except KeyError:
raise self.NotRegistered(name)
# -- these methods are irrelevant now and will be removed in 4.0
def regular(self):
return self.filter_types('regular')
def periodic(self):
return self.filter_types('periodic')
def filter_types(self, type):
return {name: task for name, task in self.items()
if getattr(task, 'type', 'regular') == type}
def _unpickle_task(name):
return get_current_app().tasks[name]
def _unpickle_task_v2(name, module=None):
if module:
import_module(module)
return get_current_app().tasks[name]

View File

@@ -0,0 +1,136 @@
"""Task Routing.
Contains utilities for working with task routers, (:setting:`task_routes`).
"""
import fnmatch
import re
from collections import OrderedDict
from collections.abc import Mapping
from kombu import Queue
from celery.exceptions import QueueNotFound
from celery.utils.collections import lpmerge
from celery.utils.functional import maybe_evaluate, mlazy
from celery.utils.imports import symbol_by_name
try:
Pattern = re._pattern_type
except AttributeError: # pragma: no cover
# for support Python 3.7
Pattern = re.Pattern
__all__ = ('MapRoute', 'Router', 'prepare')
class MapRoute:
"""Creates a router out of a :class:`dict`."""
def __init__(self, map):
map = map.items() if isinstance(map, Mapping) else map
self.map = {}
self.patterns = OrderedDict()
for k, v in map:
if isinstance(k, Pattern):
self.patterns[k] = v
elif '*' in k:
self.patterns[re.compile(fnmatch.translate(k))] = v
else:
self.map[k] = v
def __call__(self, name, *args, **kwargs):
try:
return dict(self.map[name])
except KeyError:
pass
except ValueError:
return {'queue': self.map[name]}
for regex, route in self.patterns.items():
if regex.match(name):
try:
return dict(route)
except ValueError:
return {'queue': route}
class Router:
"""Route tasks based on the :setting:`task_routes` setting."""
def __init__(self, routes=None, queues=None,
create_missing=False, app=None):
self.app = app
self.queues = {} if queues is None else queues
self.routes = [] if routes is None else routes
self.create_missing = create_missing
def route(self, options, name, args=(), kwargs=None, task_type=None):
kwargs = {} if not kwargs else kwargs
options = self.expand_destination(options) # expands 'queue'
if self.routes:
route = self.lookup_route(name, args, kwargs, options, task_type)
if route: # expands 'queue' in route.
return lpmerge(self.expand_destination(route), options)
if 'queue' not in options:
options = lpmerge(self.expand_destination(
self.app.conf.task_default_queue), options)
return options
def expand_destination(self, route):
# Route can be a queue name: convenient for direct exchanges.
if isinstance(route, str):
queue, route = route, {}
else:
# can use defaults from configured queue, but override specific
# things (like the routing_key): great for topic exchanges.
queue = route.pop('queue', None)
if queue:
if isinstance(queue, Queue):
route['queue'] = queue
else:
try:
route['queue'] = self.queues[queue]
except KeyError:
raise QueueNotFound(
f'Queue {queue!r} missing from task_queues')
return route
def lookup_route(self, name,
args=None, kwargs=None, options=None, task_type=None):
query = self.query_router
for router in self.routes:
route = query(router, name, args, kwargs, options, task_type)
if route is not None:
return route
def query_router(self, router, task, args, kwargs, options, task_type):
router = maybe_evaluate(router)
if hasattr(router, 'route_for_task'):
# pre 4.0 router class
return router.route_for_task(task, args, kwargs)
return router(task, args, kwargs, options, task=task_type)
def expand_router_string(router):
router = symbol_by_name(router)
if hasattr(router, 'route_for_task'):
# need to instantiate pre 4.0 router classes
router = router()
return router
def prepare(routes):
"""Expand the :setting:`task_routes` setting."""
def expand_route(route):
if isinstance(route, (Mapping, list, tuple)):
return MapRoute(route)
if isinstance(route, str):
return mlazy(expand_router_string, route)
return route
if routes is None:
return ()
if not isinstance(routes, (list, tuple)):
routes = (routes,)
return [expand_route(route) for route in routes]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,763 @@
"""Trace task execution.
This module defines how the task execution is traced:
errors are recorded, handlers are applied and so on.
"""
import logging
import os
import sys
import time
from collections import namedtuple
from typing import Any, Callable, Dict, FrozenSet, Optional, Sequence, Tuple, Type, Union
from warnings import warn
from billiard.einfo import ExceptionInfo, ExceptionWithTraceback
from kombu.exceptions import EncodeError
from kombu.serialization import loads as loads_message
from kombu.serialization import prepare_accept_content
from kombu.utils.encoding import safe_repr, safe_str
import celery
import celery.loaders.app
from celery import current_app, group, signals, states
from celery._state import _task_stack
from celery.app.task import Context
from celery.app.task import Task as BaseTask
from celery.exceptions import BackendGetMetaError, Ignore, InvalidTaskError, Reject, Retry
from celery.result import AsyncResult
from celery.utils.log import get_logger
from celery.utils.nodenames import gethostname
from celery.utils.objects import mro_lookup
from celery.utils.saferepr import saferepr
from celery.utils.serialization import get_pickleable_etype, get_pickleable_exception, get_pickled_exception
# ## ---
# This is the heart of the worker, the inner loop so to speak.
# It used to be split up into nice little classes and methods,
# but in the end it only resulted in bad performance and horrible tracebacks,
# so instead we now use one closure per task class.
# pylint: disable=redefined-outer-name
# We cache globals and attribute lookups, so disable this warning.
# pylint: disable=broad-except
# We know what we're doing...
__all__ = (
'TraceInfo', 'build_tracer', 'trace_task',
'setup_worker_optimizations', 'reset_worker_optimizations',
)
from celery.worker.state import successful_requests
logger = get_logger(__name__)
#: Format string used to log task receipt.
LOG_RECEIVED = """\
Task %(name)s[%(id)s] received\
"""
#: Format string used to log task success.
LOG_SUCCESS = """\
Task %(name)s[%(id)s] succeeded in %(runtime)ss: %(return_value)s\
"""
#: Format string used to log task failure.
LOG_FAILURE = """\
Task %(name)s[%(id)s] %(description)s: %(exc)s\
"""
#: Format string used to log task internal error.
LOG_INTERNAL_ERROR = """\
Task %(name)s[%(id)s] %(description)s: %(exc)s\
"""
#: Format string used to log task ignored.
LOG_IGNORED = """\
Task %(name)s[%(id)s] %(description)s\
"""
#: Format string used to log task rejected.
LOG_REJECTED = """\
Task %(name)s[%(id)s] %(exc)s\
"""
#: Format string used to log task retry.
LOG_RETRY = """\
Task %(name)s[%(id)s] retry: %(exc)s\
"""
log_policy_t = namedtuple(
'log_policy_t',
('format', 'description', 'severity', 'traceback', 'mail'),
)
log_policy_reject = log_policy_t(LOG_REJECTED, 'rejected', logging.WARN, 1, 1)
log_policy_ignore = log_policy_t(LOG_IGNORED, 'ignored', logging.INFO, 0, 0)
log_policy_internal = log_policy_t(
LOG_INTERNAL_ERROR, 'INTERNAL ERROR', logging.CRITICAL, 1, 1,
)
log_policy_expected = log_policy_t(
LOG_FAILURE, 'raised expected', logging.INFO, 0, 0,
)
log_policy_unexpected = log_policy_t(
LOG_FAILURE, 'raised unexpected', logging.ERROR, 1, 1,
)
send_prerun = signals.task_prerun.send
send_postrun = signals.task_postrun.send
send_success = signals.task_success.send
STARTED = states.STARTED
SUCCESS = states.SUCCESS
IGNORED = states.IGNORED
REJECTED = states.REJECTED
RETRY = states.RETRY
FAILURE = states.FAILURE
EXCEPTION_STATES = states.EXCEPTION_STATES
IGNORE_STATES = frozenset({IGNORED, RETRY, REJECTED})
#: set by :func:`setup_worker_optimizations`
_localized = []
_patched = {}
trace_ok_t = namedtuple('trace_ok_t', ('retval', 'info', 'runtime', 'retstr'))
def info(fmt, context):
"""Log 'fmt % context' with severity 'INFO'.
'context' is also passed in extra with key 'data' for custom handlers.
"""
logger.info(fmt, context, extra={'data': context})
def task_has_custom(task, attr):
"""Return true if the task overrides ``attr``."""
return mro_lookup(task.__class__, attr, stop={BaseTask, object},
monkey_patched=['celery.app.task'])
def get_log_policy(task, einfo, exc):
if isinstance(exc, Reject):
return log_policy_reject
elif isinstance(exc, Ignore):
return log_policy_ignore
elif einfo.internal:
return log_policy_internal
else:
if task.throws and isinstance(exc, task.throws):
return log_policy_expected
return log_policy_unexpected
def get_task_name(request, default):
"""Use 'shadow' in request for the task name if applicable."""
# request.shadow could be None or an empty string.
# If so, we should use default.
return getattr(request, 'shadow', None) or default
class TraceInfo:
"""Information about task execution."""
__slots__ = ('state', 'retval')
def __init__(self, state, retval=None):
self.state = state
self.retval = retval
def handle_error_state(self, task, req,
eager=False, call_errbacks=True):
if task.ignore_result:
store_errors = task.store_errors_even_if_ignored
elif eager and task.store_eager_result:
store_errors = True
else:
store_errors = not eager
return {
RETRY: self.handle_retry,
FAILURE: self.handle_failure,
}[self.state](task, req,
store_errors=store_errors,
call_errbacks=call_errbacks)
def handle_reject(self, task, req, **kwargs):
self._log_error(task, req, ExceptionInfo())
def handle_ignore(self, task, req, **kwargs):
self._log_error(task, req, ExceptionInfo())
def handle_retry(self, task, req, store_errors=True, **kwargs):
"""Handle retry exception."""
# the exception raised is the Retry semi-predicate,
# and it's exc' attribute is the original exception raised (if any).
type_, _, tb = sys.exc_info()
try:
reason = self.retval
einfo = ExceptionInfo((type_, reason, tb))
if store_errors:
task.backend.mark_as_retry(
req.id, reason.exc, einfo.traceback, request=req,
)
task.on_retry(reason.exc, req.id, req.args, req.kwargs, einfo)
signals.task_retry.send(sender=task, request=req,
reason=reason, einfo=einfo)
info(LOG_RETRY, {
'id': req.id,
'name': get_task_name(req, task.name),
'exc': str(reason),
})
return einfo
finally:
del tb
def handle_failure(self, task, req, store_errors=True, call_errbacks=True):
"""Handle exception."""
orig_exc = self.retval
exc = get_pickleable_exception(orig_exc)
if exc.__traceback__ is None:
# `get_pickleable_exception` may have created a new exception without
# a traceback.
_, _, exc.__traceback__ = sys.exc_info()
exc_type = get_pickleable_etype(type(orig_exc))
# make sure we only send pickleable exceptions back to parent.
einfo = ExceptionInfo(exc_info=(exc_type, exc, exc.__traceback__))
task.backend.mark_as_failure(
req.id, exc, einfo.traceback,
request=req, store_result=store_errors,
call_errbacks=call_errbacks,
)
task.on_failure(exc, req.id, req.args, req.kwargs, einfo)
signals.task_failure.send(sender=task, task_id=req.id,
exception=exc, args=req.args,
kwargs=req.kwargs,
traceback=exc.__traceback__,
einfo=einfo)
self._log_error(task, req, einfo)
return einfo
def _log_error(self, task, req, einfo):
eobj = einfo.exception = get_pickled_exception(einfo.exception)
if isinstance(eobj, ExceptionWithTraceback):
eobj = einfo.exception = eobj.exc
exception, traceback, exc_info, sargs, skwargs = (
safe_repr(eobj),
safe_str(einfo.traceback),
einfo.exc_info,
req.get('argsrepr') or safe_repr(req.args),
req.get('kwargsrepr') or safe_repr(req.kwargs),
)
policy = get_log_policy(task, einfo, eobj)
context = {
'hostname': req.hostname,
'id': req.id,
'name': get_task_name(req, task.name),
'exc': exception,
'traceback': traceback,
'args': sargs,
'kwargs': skwargs,
'description': policy.description,
'internal': einfo.internal,
}
logger.log(policy.severity, policy.format.strip(), context,
exc_info=exc_info if policy.traceback else None,
extra={'data': context})
def traceback_clear(exc=None):
# Cleared Tb, but einfo still has a reference to Traceback.
# exc cleans up the Traceback at the last moment that can be revealed.
tb = None
if exc is not None:
if hasattr(exc, '__traceback__'):
tb = exc.__traceback__
else:
_, _, tb = sys.exc_info()
else:
_, _, tb = sys.exc_info()
while tb is not None:
try:
tb.tb_frame.clear()
tb.tb_frame.f_locals
except RuntimeError:
# Ignore the exception raised if the frame is still executing.
pass
tb = tb.tb_next
def build_tracer(
name: str,
task: Union[celery.Task, celery.local.PromiseProxy],
loader: Optional[celery.loaders.app.AppLoader] = None,
hostname: Optional[str] = None,
store_errors: bool = True,
Info: Type[TraceInfo] = TraceInfo,
eager: bool = False,
propagate: bool = False,
app: Optional[celery.Celery] = None,
monotonic: Callable[[], int] = time.monotonic,
trace_ok_t: Type[trace_ok_t] = trace_ok_t,
IGNORE_STATES: FrozenSet[str] = IGNORE_STATES) -> \
Callable[[str, Tuple[Any, ...], Dict[str, Any], Any], trace_ok_t]:
"""Return a function that traces task execution.
Catches all exceptions and updates result backend with the
state and result.
If the call was successful, it saves the result to the task result
backend, and sets the task status to `"SUCCESS"`.
If the call raises :exc:`~@Retry`, it extracts
the original exception, uses that as the result and sets the task state
to `"RETRY"`.
If the call results in an exception, it saves the exception as the task
result, and sets the task state to `"FAILURE"`.
Return a function that takes the following arguments:
:param uuid: The id of the task.
:param args: List of positional args to pass on to the function.
:param kwargs: Keyword arguments mapping to pass on to the function.
:keyword request: Request dict.
"""
# pylint: disable=too-many-statements
# If the task doesn't define a custom __call__ method
# we optimize it away by simply calling the run method directly,
# saving the extra method call and a line less in the stack trace.
fun = task if task_has_custom(task, '__call__') else task.run
loader = loader or app.loader
ignore_result = task.ignore_result
track_started = task.track_started
track_started = not eager and (task.track_started and not ignore_result)
# #6476
if eager and not ignore_result and task.store_eager_result:
publish_result = True
else:
publish_result = not eager and not ignore_result
deduplicate_successful_tasks = ((app.conf.task_acks_late or task.acks_late)
and app.conf.worker_deduplicate_successful_tasks
and app.backend.persistent)
hostname = hostname or gethostname()
inherit_parent_priority = app.conf.task_inherit_parent_priority
loader_task_init = loader.on_task_init
loader_cleanup = loader.on_process_cleanup
task_before_start = None
task_on_success = None
task_after_return = None
if task_has_custom(task, 'before_start'):
task_before_start = task.before_start
if task_has_custom(task, 'on_success'):
task_on_success = task.on_success
if task_has_custom(task, 'after_return'):
task_after_return = task.after_return
pid = os.getpid()
request_stack = task.request_stack
push_request = request_stack.push
pop_request = request_stack.pop
push_task = _task_stack.push
pop_task = _task_stack.pop
_does_info = logger.isEnabledFor(logging.INFO)
resultrepr_maxsize = task.resultrepr_maxsize
prerun_receivers = signals.task_prerun.receivers
postrun_receivers = signals.task_postrun.receivers
success_receivers = signals.task_success.receivers
from celery import canvas
signature = canvas.maybe_signature # maybe_ does not clone if already
def on_error(
request: celery.app.task.Context,
exc: Union[Exception, Type[Exception]],
state: str = FAILURE,
call_errbacks: bool = True) -> Tuple[Info, Any, Any, Any]:
"""Handle any errors raised by a `Task`'s execution."""
if propagate:
raise
I = Info(state, exc)
R = I.handle_error_state(
task, request, eager=eager, call_errbacks=call_errbacks,
)
return I, R, I.state, I.retval
def trace_task(
uuid: str,
args: Sequence[Any],
kwargs: Dict[str, Any],
request: Optional[Dict[str, Any]] = None) -> trace_ok_t:
"""Execute and trace a `Task`."""
# R - is the possibly prepared return value.
# I - is the Info object.
# T - runtime
# Rstr - textual representation of return value
# retval - is the always unmodified return value.
# state - is the resulting task state.
# This function is very long because we've unrolled all the calls
# for performance reasons, and because the function is so long
# we want the main variables (I, and R) to stand out visually from the
# the rest of the variables, so breaking PEP8 is worth it ;)
R = I = T = Rstr = retval = state = None
task_request = None
time_start = monotonic()
try:
try:
kwargs.items
except AttributeError:
raise InvalidTaskError(
'Task keyword arguments is not a mapping')
task_request = Context(request or {}, args=args,
called_directly=False, kwargs=kwargs)
redelivered = (task_request.delivery_info
and task_request.delivery_info.get('redelivered', False))
if deduplicate_successful_tasks and redelivered:
if task_request.id in successful_requests:
return trace_ok_t(R, I, T, Rstr)
r = AsyncResult(task_request.id, app=app)
try:
state = r.state
except BackendGetMetaError:
pass
else:
if state == SUCCESS:
info(LOG_IGNORED, {
'id': task_request.id,
'name': get_task_name(task_request, name),
'description': 'Task already completed successfully.'
})
return trace_ok_t(R, I, T, Rstr)
push_task(task)
root_id = task_request.root_id or uuid
task_priority = task_request.delivery_info.get('priority') if \
inherit_parent_priority else None
push_request(task_request)
try:
# -*- PRE -*-
if prerun_receivers:
send_prerun(sender=task, task_id=uuid, task=task,
args=args, kwargs=kwargs)
loader_task_init(uuid, task)
if track_started:
task.backend.store_result(
uuid, {'pid': pid, 'hostname': hostname}, STARTED,
request=task_request,
)
# -*- TRACE -*-
try:
if task_before_start:
task_before_start(uuid, args, kwargs)
R = retval = fun(*args, **kwargs)
state = SUCCESS
except Reject as exc:
I, R = Info(REJECTED, exc), ExceptionInfo(internal=True)
state, retval = I.state, I.retval
I.handle_reject(task, task_request)
traceback_clear(exc)
except Ignore as exc:
I, R = Info(IGNORED, exc), ExceptionInfo(internal=True)
state, retval = I.state, I.retval
I.handle_ignore(task, task_request)
traceback_clear(exc)
except Retry as exc:
I, R, state, retval = on_error(
task_request, exc, RETRY, call_errbacks=False)
traceback_clear(exc)
except Exception as exc:
I, R, state, retval = on_error(task_request, exc)
traceback_clear(exc)
except BaseException:
raise
else:
try:
# callback tasks must be applied before the result is
# stored, so that result.children is populated.
# groups are called inline and will store trail
# separately, so need to call them separately
# so that the trail's not added multiple times :(
# (Issue #1936)
callbacks = task.request.callbacks
if callbacks:
if len(task.request.callbacks) > 1:
sigs, groups = [], []
for sig in callbacks:
sig = signature(sig, app=app)
if isinstance(sig, group):
groups.append(sig)
else:
sigs.append(sig)
for group_ in groups:
group_.apply_async(
(retval,),
parent_id=uuid, root_id=root_id,
priority=task_priority
)
if sigs:
group(sigs, app=app).apply_async(
(retval,),
parent_id=uuid, root_id=root_id,
priority=task_priority
)
else:
signature(callbacks[0], app=app).apply_async(
(retval,), parent_id=uuid, root_id=root_id,
priority=task_priority
)
# execute first task in chain
chain = task_request.chain
if chain:
_chsig = signature(chain.pop(), app=app)
_chsig.apply_async(
(retval,), chain=chain,
parent_id=uuid, root_id=root_id,
priority=task_priority
)
task.backend.mark_as_done(
uuid, retval, task_request, publish_result,
)
except EncodeError as exc:
I, R, state, retval = on_error(task_request, exc)
else:
Rstr = saferepr(R, resultrepr_maxsize)
T = monotonic() - time_start
if task_on_success:
task_on_success(retval, uuid, args, kwargs)
if success_receivers:
send_success(sender=task, result=retval)
if _does_info:
info(LOG_SUCCESS, {
'id': uuid,
'name': get_task_name(task_request, name),
'return_value': Rstr,
'runtime': T,
'args': task_request.get('argsrepr') or safe_repr(args),
'kwargs': task_request.get('kwargsrepr') or safe_repr(kwargs),
})
# -* POST *-
if state not in IGNORE_STATES:
if task_after_return:
task_after_return(
state, retval, uuid, args, kwargs, None,
)
finally:
try:
if postrun_receivers:
send_postrun(sender=task, task_id=uuid, task=task,
args=args, kwargs=kwargs,
retval=retval, state=state)
finally:
pop_task()
pop_request()
if not eager:
try:
task.backend.process_cleanup()
loader_cleanup()
except (KeyboardInterrupt, SystemExit, MemoryError):
raise
except Exception as exc:
logger.error('Process cleanup failed: %r', exc,
exc_info=True)
except MemoryError:
raise
except Exception as exc:
_signal_internal_error(task, uuid, args, kwargs, request, exc)
if eager:
raise
R = report_internal_error(task, exc)
if task_request is not None:
I, _, _, _ = on_error(task_request, exc)
return trace_ok_t(R, I, T, Rstr)
return trace_task
def trace_task(task, uuid, args, kwargs, request=None, **opts):
"""Trace task execution."""
request = {} if not request else request
try:
if task.__trace__ is None:
task.__trace__ = build_tracer(task.name, task, **opts)
return task.__trace__(uuid, args, kwargs, request)
except Exception as exc:
_signal_internal_error(task, uuid, args, kwargs, request, exc)
return trace_ok_t(report_internal_error(task, exc), TraceInfo(FAILURE, exc), 0.0, None)
def _signal_internal_error(task, uuid, args, kwargs, request, exc):
"""Send a special `internal_error` signal to the app for outside body errors."""
try:
_, _, tb = sys.exc_info()
einfo = ExceptionInfo()
einfo.exception = get_pickleable_exception(einfo.exception)
einfo.type = get_pickleable_etype(einfo.type)
signals.task_internal_error.send(
sender=task,
task_id=uuid,
args=args,
kwargs=kwargs,
request=request,
exception=exc,
traceback=tb,
einfo=einfo,
)
finally:
del tb
def trace_task_ret(name, uuid, request, body, content_type,
content_encoding, loads=loads_message, app=None,
**extra_request):
app = app or current_app._get_current_object()
embed = None
if content_type:
accept = prepare_accept_content(app.conf.accept_content)
args, kwargs, embed = loads(
body, content_type, content_encoding, accept=accept,
)
else:
args, kwargs, embed = body
hostname = gethostname()
request.update({
'args': args, 'kwargs': kwargs,
'hostname': hostname, 'is_eager': False,
}, **embed or {})
R, I, T, Rstr = trace_task(app.tasks[name],
uuid, args, kwargs, request, app=app)
return (1, R, T) if I else (0, Rstr, T)
def fast_trace_task(task, uuid, request, body, content_type,
content_encoding, loads=loads_message, _loc=None,
hostname=None, **_):
_loc = _localized if not _loc else _loc
embed = None
tasks, accept, hostname = _loc
if content_type:
args, kwargs, embed = loads(
body, content_type, content_encoding, accept=accept,
)
else:
args, kwargs, embed = body
request.update({
'args': args, 'kwargs': kwargs,
'hostname': hostname, 'is_eager': False,
}, **embed or {})
R, I, T, Rstr = tasks[task].__trace__(
uuid, args, kwargs, request,
)
return (1, R, T) if I else (0, Rstr, T)
def report_internal_error(task, exc):
_type, _value, _tb = sys.exc_info()
try:
_value = task.backend.prepare_exception(exc, 'pickle')
exc_info = ExceptionInfo((_type, _value, _tb), internal=True)
warn(RuntimeWarning(
'Exception raised outside body: {!r}:\n{}'.format(
exc, exc_info.traceback)))
return exc_info
finally:
del _tb
def setup_worker_optimizations(app, hostname=None):
"""Setup worker related optimizations."""
hostname = hostname or gethostname()
# make sure custom Task.__call__ methods that calls super
# won't mess up the request/task stack.
_install_stack_protection()
# all new threads start without a current app, so if an app is not
# passed on to the thread it will fall back to the "default app",
# which then could be the wrong app. So for the worker
# we set this to always return our app. This is a hack,
# and means that only a single app can be used for workers
# running in the same process.
app.set_current()
app.set_default()
# evaluate all task classes by finalizing the app.
app.finalize()
# set fast shortcut to task registry
_localized[:] = [
app._tasks,
prepare_accept_content(app.conf.accept_content),
hostname,
]
app.use_fast_trace_task = True
def reset_worker_optimizations(app=current_app):
"""Reset previously configured optimizations."""
try:
delattr(BaseTask, '_stackprotected')
except AttributeError:
pass
try:
BaseTask.__call__ = _patched.pop('BaseTask.__call__')
except KeyError:
pass
app.use_fast_trace_task = False
def _install_stack_protection():
# Patches BaseTask.__call__ in the worker to handle the edge case
# where people override it and also call super.
#
# - The worker optimizes away BaseTask.__call__ and instead
# calls task.run directly.
# - so with the addition of current_task and the request stack
# BaseTask.__call__ now pushes to those stacks so that
# they work when tasks are called directly.
#
# The worker only optimizes away __call__ in the case
# where it hasn't been overridden, so the request/task stack
# will blow if a custom task class defines __call__ and also
# calls super().
if not getattr(BaseTask, '_stackprotected', False):
_patched['BaseTask.__call__'] = orig = BaseTask.__call__
def __protected_call__(self, *args, **kwargs):
stack = self.request_stack
req = stack.top
if req and not req._protected and \
len(stack) == 1 and not req.called_directly:
req._protected = 1
return self.run(*args, **kwargs)
return orig(self, *args, **kwargs)
BaseTask.__call__ = __protected_call__
BaseTask._stackprotected = True

View File

@@ -0,0 +1,415 @@
"""App utilities: Compat settings, bug-report tool, pickling apps."""
import os
import platform as _platform
import re
from collections import namedtuple
from collections.abc import Mapping
from copy import deepcopy
from types import ModuleType
from kombu.utils.url import maybe_sanitize_url
from celery.exceptions import ImproperlyConfigured
from celery.platforms import pyimplementation
from celery.utils.collections import ConfigurationView
from celery.utils.imports import import_from_cwd, qualname, symbol_by_name
from celery.utils.text import pretty
from .defaults import _OLD_DEFAULTS, _OLD_SETTING_KEYS, _TO_NEW_KEY, _TO_OLD_KEY, DEFAULTS, SETTING_KEYS, find
__all__ = (
'Settings', 'appstr', 'bugreport',
'filter_hidden_settings', 'find_app',
)
#: Format used to generate bug-report information.
BUGREPORT_INFO = """
software -> celery:{celery_v} kombu:{kombu_v} py:{py_v}
billiard:{billiard_v} {driver_v}
platform -> system:{system} arch:{arch}
kernel version:{kernel_version} imp:{py_i}
loader -> {loader}
settings -> transport:{transport} results:{results}
{human_settings}
"""
HIDDEN_SETTINGS = re.compile(
'API|TOKEN|KEY|SECRET|PASS|PROFANITIES_LIST|SIGNATURE|DATABASE',
re.IGNORECASE,
)
E_MIX_OLD_INTO_NEW = """
Cannot mix new and old setting keys, please rename the
following settings to the new format:
{renames}
"""
E_MIX_NEW_INTO_OLD = """
Cannot mix new setting names with old setting names, please
rename the following settings to use the old format:
{renames}
Or change all of the settings to use the new format :)
"""
FMT_REPLACE_SETTING = '{replace:<36} -> {with_}'
def appstr(app):
"""String used in __repr__ etc, to id app instances."""
return f'{app.main or "__main__"} at {id(app):#x}'
class Settings(ConfigurationView):
"""Celery settings object.
.. seealso:
:ref:`configuration` for a full list of configuration keys.
"""
def __init__(self, *args, deprecated_settings=None, **kwargs):
super().__init__(*args, **kwargs)
self.deprecated_settings = deprecated_settings
@property
def broker_read_url(self):
return (
os.environ.get('CELERY_BROKER_READ_URL') or
self.get('broker_read_url') or
self.broker_url
)
@property
def broker_write_url(self):
return (
os.environ.get('CELERY_BROKER_WRITE_URL') or
self.get('broker_write_url') or
self.broker_url
)
@property
def broker_url(self):
return (
os.environ.get('CELERY_BROKER_URL') or
self.first('broker_url', 'broker_host')
)
@property
def result_backend(self):
return (
os.environ.get('CELERY_RESULT_BACKEND') or
self.first('result_backend', 'CELERY_RESULT_BACKEND')
)
@property
def task_default_exchange(self):
return self.first(
'task_default_exchange',
'task_default_queue',
)
@property
def task_default_routing_key(self):
return self.first(
'task_default_routing_key',
'task_default_queue',
)
@property
def timezone(self):
# this way we also support django's time zone.
return self.first('timezone', 'TIME_ZONE')
def without_defaults(self):
"""Return the current configuration, but without defaults."""
# the last stash is the default settings, so just skip that
return Settings({}, self.maps[:-1])
def value_set_for(self, key):
return key in self.without_defaults()
def find_option(self, name, namespace=''):
"""Search for option by name.
Example:
>>> from proj.celery import app
>>> app.conf.find_option('disable_rate_limits')
('worker', 'prefetch_multiplier',
<Option: type->bool default->False>))
Arguments:
name (str): Name of option, cannot be partial.
namespace (str): Preferred name-space (``None`` by default).
Returns:
Tuple: of ``(namespace, key, type)``.
"""
return find(name, namespace)
def find_value_for_key(self, name, namespace='celery'):
"""Shortcut to ``get_by_parts(*find_option(name)[:-1])``."""
return self.get_by_parts(*self.find_option(name, namespace)[:-1])
def get_by_parts(self, *parts):
"""Return the current value for setting specified as a path.
Example:
>>> from proj.celery import app
>>> app.conf.get_by_parts('worker', 'disable_rate_limits')
False
"""
return self['_'.join(part for part in parts if part)]
def finalize(self):
# See PendingConfiguration in celery/app/base.py
# first access will read actual configuration.
try:
self['__bogus__']
except KeyError:
pass
return self
def table(self, with_defaults=False, censored=True):
filt = filter_hidden_settings if censored else lambda v: v
dict_members = dir(dict)
self.finalize()
settings = self if with_defaults else self.without_defaults()
return filt({
k: v for k, v in settings.items()
if not k.startswith('_') and k not in dict_members
})
def humanize(self, with_defaults=False, censored=True):
"""Return a human readable text showing configuration changes."""
return '\n'.join(
f'{key}: {pretty(value, width=50)}'
for key, value in self.table(with_defaults, censored).items())
def maybe_warn_deprecated_settings(self):
# TODO: Remove this method in Celery 6.0
if self.deprecated_settings:
from celery.app.defaults import _TO_NEW_KEY
from celery.utils import deprecated
for setting in self.deprecated_settings:
deprecated.warn(description=f'The {setting!r} setting',
removal='6.0.0',
alternative=f'Use the {_TO_NEW_KEY[setting]} instead')
return True
return False
def _new_key_to_old(key, convert=_TO_OLD_KEY.get):
return convert(key, key)
def _old_key_to_new(key, convert=_TO_NEW_KEY.get):
return convert(key, key)
_settings_info_t = namedtuple('settings_info_t', (
'defaults', 'convert', 'key_t', 'mix_error',
))
_settings_info = _settings_info_t(
DEFAULTS, _TO_NEW_KEY, _old_key_to_new, E_MIX_OLD_INTO_NEW,
)
_old_settings_info = _settings_info_t(
_OLD_DEFAULTS, _TO_OLD_KEY, _new_key_to_old, E_MIX_NEW_INTO_OLD,
)
def detect_settings(conf, preconf=None, ignore_keys=None, prefix=None,
all_keys=None, old_keys=None):
preconf = {} if not preconf else preconf
ignore_keys = set() if not ignore_keys else ignore_keys
all_keys = SETTING_KEYS if not all_keys else all_keys
old_keys = _OLD_SETTING_KEYS if not old_keys else old_keys
source = conf
if conf is None:
source, conf = preconf, {}
have = set(source.keys()) - ignore_keys
is_in_new = have.intersection(all_keys)
is_in_old = have.intersection(old_keys)
info = None
if is_in_new:
# have new setting names
info, left = _settings_info, is_in_old
if is_in_old and len(is_in_old) > len(is_in_new):
# Majority of the settings are old.
info, left = _old_settings_info, is_in_new
if is_in_old:
# have old setting names, or a majority of the names are old.
if not info:
info, left = _old_settings_info, is_in_new
if is_in_new and len(is_in_new) > len(is_in_old):
# Majority of the settings are new
info, left = _settings_info, is_in_old
else:
# no settings, just use new format.
info, left = _settings_info, is_in_old
if prefix:
# always use new format if prefix is used.
info, left = _settings_info, set()
# only raise error for keys that the user didn't provide two keys
# for (e.g., both ``result_expires`` and ``CELERY_TASK_RESULT_EXPIRES``).
really_left = {key for key in left if info.convert[key] not in have}
if really_left:
# user is mixing old/new, or new/old settings, give renaming
# suggestions.
raise ImproperlyConfigured(info.mix_error.format(renames='\n'.join(
FMT_REPLACE_SETTING.format(replace=key, with_=info.convert[key])
for key in sorted(really_left)
)))
preconf = {info.convert.get(k, k): v for k, v in preconf.items()}
defaults = dict(deepcopy(info.defaults), **preconf)
return Settings(
preconf, [conf, defaults],
(_old_key_to_new, _new_key_to_old),
deprecated_settings=is_in_old,
prefix=prefix,
)
class AppPickler:
"""Old application pickler/unpickler (< 3.1)."""
def __call__(self, cls, *args):
kwargs = self.build_kwargs(*args)
app = self.construct(cls, **kwargs)
self.prepare(app, **kwargs)
return app
def prepare(self, app, **kwargs):
app.conf.update(kwargs['changes'])
def build_kwargs(self, *args):
return self.build_standard_kwargs(*args)
def build_standard_kwargs(self, main, changes, loader, backend, amqp,
events, log, control, accept_magic_kwargs,
config_source=None):
return {'main': main, 'loader': loader, 'backend': backend,
'amqp': amqp, 'changes': changes, 'events': events,
'log': log, 'control': control, 'set_as_current': False,
'config_source': config_source}
def construct(self, cls, **kwargs):
return cls(**kwargs)
def _unpickle_app(cls, pickler, *args):
"""Rebuild app for versions 2.5+."""
return pickler()(cls, *args)
def _unpickle_app_v2(cls, kwargs):
"""Rebuild app for versions 3.1+."""
kwargs['set_as_current'] = False
return cls(**kwargs)
def filter_hidden_settings(conf):
"""Filter sensitive settings."""
def maybe_censor(key, value, mask='*' * 8):
if isinstance(value, Mapping):
return filter_hidden_settings(value)
if isinstance(key, str):
if HIDDEN_SETTINGS.search(key):
return mask
elif 'broker_url' in key.lower():
from kombu import Connection
return Connection(value).as_uri(mask=mask)
elif 'backend' in key.lower():
return maybe_sanitize_url(value, mask=mask)
return value
return {k: maybe_censor(k, v) for k, v in conf.items()}
def bugreport(app):
"""Return a string containing information useful in bug-reports."""
import billiard
import kombu
import celery
try:
conn = app.connection()
driver_v = '{}:{}'.format(conn.transport.driver_name,
conn.transport.driver_version())
transport = conn.transport_cls
except Exception: # pylint: disable=broad-except
transport = driver_v = ''
return BUGREPORT_INFO.format(
system=_platform.system(),
arch=', '.join(x for x in _platform.architecture() if x),
kernel_version=_platform.release(),
py_i=pyimplementation(),
celery_v=celery.VERSION_BANNER,
kombu_v=kombu.__version__,
billiard_v=billiard.__version__,
py_v=_platform.python_version(),
driver_v=driver_v,
transport=transport,
results=maybe_sanitize_url(app.conf.result_backend or 'disabled'),
human_settings=app.conf.humanize(),
loader=qualname(app.loader.__class__),
)
def find_app(app, symbol_by_name=symbol_by_name, imp=import_from_cwd):
"""Find app by name."""
from .base import Celery
try:
sym = symbol_by_name(app, imp=imp)
except AttributeError:
# last part was not an attribute, but a module
sym = imp(app)
if isinstance(sym, ModuleType) and ':' not in app:
try:
found = sym.app
if isinstance(found, ModuleType):
raise AttributeError()
except AttributeError:
try:
found = sym.celery
if isinstance(found, ModuleType):
raise AttributeError(
"attribute 'celery' is the celery module not the instance of celery")
except AttributeError:
if getattr(sym, '__path__', None):
try:
return find_app(
f'{app}.celery',
symbol_by_name=symbol_by_name, imp=imp,
)
except ImportError:
pass
for suspect in vars(sym).values():
if isinstance(suspect, Celery):
return suspect
raise
else:
return found
else:
return found
return sym

View File

@@ -0,0 +1,160 @@
"""Beat command-line program.
This module is the 'program-version' of :mod:`celery.beat`.
It does everything necessary to run that module
as an actual application, like installing signal handlers
and so on.
"""
from __future__ import annotations
import numbers
import socket
import sys
from datetime import datetime
from signal import Signals
from types import FrameType
from typing import Any
from celery import VERSION_BANNER, Celery, beat, platforms
from celery.utils.imports import qualname
from celery.utils.log import LOG_LEVELS, get_logger
from celery.utils.time import humanize_seconds
__all__ = ('Beat',)
STARTUP_INFO_FMT = """
LocalTime -> {timestamp}
Configuration ->
. broker -> {conninfo}
. loader -> {loader}
. scheduler -> {scheduler}
{scheduler_info}
. logfile -> {logfile}@%{loglevel}
. maxinterval -> {hmax_interval} ({max_interval}s)
""".strip()
logger = get_logger('celery.beat')
class Beat:
"""Beat as a service."""
Service = beat.Service
app: Celery = None
def __init__(self, max_interval: int | None = None, app: Celery | None = None,
socket_timeout: int = 30, pidfile: str | None = None, no_color: bool | None = None,
loglevel: str = 'WARN', logfile: str | None = None, schedule: str | None = None,
scheduler: str | None = None,
scheduler_cls: str | None = None, # XXX use scheduler
redirect_stdouts: bool | None = None,
redirect_stdouts_level: str | None = None,
quiet: bool = False, **kwargs: Any) -> None:
self.app = app = app or self.app
either = self.app.either
self.loglevel = loglevel
self.logfile = logfile
self.schedule = either('beat_schedule_filename', schedule)
self.scheduler_cls = either(
'beat_scheduler', scheduler, scheduler_cls)
self.redirect_stdouts = either(
'worker_redirect_stdouts', redirect_stdouts)
self.redirect_stdouts_level = either(
'worker_redirect_stdouts_level', redirect_stdouts_level)
self.quiet = quiet
self.max_interval = max_interval
self.socket_timeout = socket_timeout
self.no_color = no_color
self.colored = app.log.colored(
self.logfile,
enabled=not no_color if no_color is not None else no_color,
)
self.pidfile = pidfile
if not isinstance(self.loglevel, numbers.Integral):
self.loglevel = LOG_LEVELS[self.loglevel.upper()]
def run(self) -> None:
if not self.quiet:
print(str(self.colored.cyan(
f'celery beat v{VERSION_BANNER} is starting.')))
self.init_loader()
self.set_process_title()
self.start_scheduler()
def setup_logging(self, colorize: bool | None = None) -> None:
if colorize is None and self.no_color is not None:
colorize = not self.no_color
self.app.log.setup(self.loglevel, self.logfile,
self.redirect_stdouts, self.redirect_stdouts_level,
colorize=colorize)
def start_scheduler(self) -> None:
if self.pidfile:
platforms.create_pidlock(self.pidfile)
service = self.Service(
app=self.app,
max_interval=self.max_interval,
scheduler_cls=self.scheduler_cls,
schedule_filename=self.schedule,
)
if not self.quiet:
print(self.banner(service))
self.setup_logging()
if self.socket_timeout:
logger.debug('Setting default socket timeout to %r',
self.socket_timeout)
socket.setdefaulttimeout(self.socket_timeout)
try:
self.install_sync_handler(service)
service.start()
except Exception as exc:
logger.critical('beat raised exception %s: %r',
exc.__class__, exc,
exc_info=True)
raise
def banner(self, service: beat.Service) -> str:
c = self.colored
return str(
c.blue('__ ', c.magenta('-'),
c.blue(' ... __ '), c.magenta('-'),
c.blue(' _\n'),
c.reset(self.startup_info(service))),
)
def init_loader(self) -> None:
# Run the worker init handler.
# (Usually imports task modules and such.)
self.app.loader.init_worker()
self.app.finalize()
def startup_info(self, service: beat.Service) -> str:
scheduler = service.get_scheduler(lazy=True)
return STARTUP_INFO_FMT.format(
conninfo=self.app.connection().as_uri(),
timestamp=datetime.now().replace(microsecond=0),
logfile=self.logfile or '[stderr]',
loglevel=LOG_LEVELS[self.loglevel],
loader=qualname(self.app.loader),
scheduler=qualname(scheduler),
scheduler_info=scheduler.info,
hmax_interval=humanize_seconds(scheduler.max_interval),
max_interval=scheduler.max_interval,
)
def set_process_title(self) -> None:
arg_start = 'manage' in sys.argv[0] and 2 or 1
platforms.set_process_title(
'celery beat', info=' '.join(sys.argv[arg_start:]),
)
def install_sync_handler(self, service: beat.Service) -> None:
"""Install a `SIGTERM` + `SIGINT` handler saving the schedule."""
def _sync(signum: Signals, frame: FrameType) -> None:
service.sync()
raise SystemExit()
platforms.signals.update(SIGTERM=_sync, SIGINT=_sync)

View File

@@ -0,0 +1,506 @@
"""Start/stop/manage workers."""
import errno
import os
import shlex
import signal
import sys
from collections import OrderedDict, UserList, defaultdict
from functools import partial
from subprocess import Popen
from time import sleep
from kombu.utils.encoding import from_utf8
from kombu.utils.objects import cached_property
from celery.platforms import IS_WINDOWS, Pidfile, signal_name
from celery.utils.nodenames import gethostname, host_format, node_format, nodesplit
from celery.utils.saferepr import saferepr
__all__ = ('Cluster', 'Node')
CELERY_EXE = 'celery'
def celery_exe(*args):
return ' '.join((CELERY_EXE,) + args)
def build_nodename(name, prefix, suffix):
hostname = suffix
if '@' in name:
nodename = host_format(name)
shortname, hostname = nodesplit(nodename)
name = shortname
else:
shortname = f'{prefix}{name}'
nodename = host_format(
f'{shortname}@{hostname}',
)
return name, nodename, hostname
def build_expander(nodename, shortname, hostname):
return partial(
node_format,
name=nodename,
N=shortname,
d=hostname,
h=nodename,
i='%i',
I='%I',
)
def format_opt(opt, value):
if not value:
return opt
if opt.startswith('--'):
return f'{opt}={value}'
return f'{opt} {value}'
def _kwargs_to_command_line(kwargs):
return {
('--{}'.format(k.replace('_', '-'))
if len(k) > 1 else f'-{k}'): f'{v}'
for k, v in kwargs.items()
}
class NamespacedOptionParser:
def __init__(self, args):
self.args = args
self.options = OrderedDict()
self.values = []
self.passthrough = ''
self.namespaces = defaultdict(lambda: OrderedDict())
def parse(self):
rargs = [arg for arg in self.args if arg]
pos = 0
while pos < len(rargs):
arg = rargs[pos]
if arg == '--':
self.passthrough = ' '.join(rargs[pos:])
break
elif arg[0] == '-':
if arg[1] == '-':
self.process_long_opt(arg[2:])
else:
value = None
if len(rargs) > pos + 1 and rargs[pos + 1][0] != '-':
value = rargs[pos + 1]
pos += 1
self.process_short_opt(arg[1:], value)
else:
self.values.append(arg)
pos += 1
def process_long_opt(self, arg, value=None):
if '=' in arg:
arg, value = arg.split('=', 1)
self.add_option(arg, value, short=False)
def process_short_opt(self, arg, value=None):
self.add_option(arg, value, short=True)
def optmerge(self, ns, defaults=None):
if defaults is None:
defaults = self.options
return OrderedDict(defaults, **self.namespaces[ns])
def add_option(self, name, value, short=False, ns=None):
prefix = short and '-' or '--'
dest = self.options
if ':' in name:
name, ns = name.split(':')
dest = self.namespaces[ns]
dest[prefix + name] = value
class Node:
"""Represents a node in a cluster."""
def __init__(self, name,
cmd=None, append=None, options=None, extra_args=None):
self.name = name
self.cmd = cmd or f"-m {celery_exe('worker', '--detach')}"
self.append = append
self.extra_args = extra_args or ''
self.options = self._annotate_with_default_opts(
options or OrderedDict())
self.expander = self._prepare_expander()
self.argv = self._prepare_argv()
self._pid = None
def _annotate_with_default_opts(self, options):
options['-n'] = self.name
self._setdefaultopt(options, ['--pidfile', '-p'], '/var/run/celery/%n.pid')
self._setdefaultopt(options, ['--logfile', '-f'], '/var/log/celery/%n%I.log')
self._setdefaultopt(options, ['--executable'], sys.executable)
return options
def _setdefaultopt(self, d, alt, value):
for opt in alt[1:]:
try:
return d[opt]
except KeyError:
pass
value = d.setdefault(alt[0], os.path.normpath(value))
dir_path = os.path.dirname(value)
if dir_path and not os.path.exists(dir_path):
os.makedirs(dir_path)
return value
def _prepare_expander(self):
shortname, hostname = self.name.split('@', 1)
return build_expander(
self.name, shortname, hostname)
def _prepare_argv(self):
cmd = self.expander(self.cmd).split(' ')
i = cmd.index('celery') + 1
options = self.options.copy()
for opt, value in self.options.items():
if opt in (
'-A', '--app',
'-b', '--broker',
'--result-backend',
'--loader',
'--config',
'--workdir',
'-C', '--no-color',
'-q', '--quiet',
):
cmd.insert(i, format_opt(opt, self.expander(value)))
options.pop(opt)
cmd = [' '.join(cmd)]
argv = tuple(
cmd +
[format_opt(opt, self.expander(value))
for opt, value in options.items()] +
[self.extra_args]
)
if self.append:
argv += (self.expander(self.append),)
return argv
def alive(self):
return self.send(0)
def send(self, sig, on_error=None):
pid = self.pid
if pid:
try:
os.kill(pid, sig)
except OSError as exc:
if exc.errno != errno.ESRCH:
raise
maybe_call(on_error, self)
return False
return True
maybe_call(on_error, self)
def start(self, env=None, **kwargs):
return self._waitexec(
self.argv, path=self.executable, env=env, **kwargs)
def _waitexec(self, argv, path=sys.executable, env=None,
on_spawn=None, on_signalled=None, on_failure=None):
argstr = self.prepare_argv(argv, path)
maybe_call(on_spawn, self, argstr=' '.join(argstr), env=env)
pipe = Popen(argstr, env=env)
return self.handle_process_exit(
pipe.wait(),
on_signalled=on_signalled,
on_failure=on_failure,
)
def handle_process_exit(self, retcode, on_signalled=None, on_failure=None):
if retcode < 0:
maybe_call(on_signalled, self, -retcode)
return -retcode
elif retcode > 0:
maybe_call(on_failure, self, retcode)
return retcode
def prepare_argv(self, argv, path):
args = ' '.join([path] + list(argv))
return shlex.split(from_utf8(args), posix=not IS_WINDOWS)
def getopt(self, *alt):
for opt in alt:
try:
return self.options[opt]
except KeyError:
pass
raise KeyError(alt[0])
def __repr__(self):
return f'<{type(self).__name__}: {self.name}>'
@cached_property
def pidfile(self):
return self.expander(self.getopt('--pidfile', '-p'))
@cached_property
def logfile(self):
return self.expander(self.getopt('--logfile', '-f'))
@property
def pid(self):
if self._pid is not None:
return self._pid
try:
return Pidfile(self.pidfile).read_pid()
except ValueError:
pass
@pid.setter
def pid(self, value):
self._pid = value
@cached_property
def executable(self):
return self.options['--executable']
@cached_property
def argv_with_executable(self):
return (self.executable,) + self.argv
@classmethod
def from_kwargs(cls, name, **kwargs):
return cls(name, options=_kwargs_to_command_line(kwargs))
def maybe_call(fun, *args, **kwargs):
if fun is not None:
fun(*args, **kwargs)
class MultiParser:
Node = Node
def __init__(self, cmd='celery worker',
append='', prefix='', suffix='',
range_prefix='celery'):
self.cmd = cmd
self.append = append
self.prefix = prefix
self.suffix = suffix
self.range_prefix = range_prefix
def parse(self, p):
names = p.values
options = dict(p.options)
ranges = len(names) == 1
prefix = self.prefix
cmd = options.pop('--cmd', self.cmd)
append = options.pop('--append', self.append)
hostname = options.pop('--hostname', options.pop('-n', gethostname()))
prefix = options.pop('--prefix', prefix) or ''
suffix = options.pop('--suffix', self.suffix) or hostname
suffix = '' if suffix in ('""', "''") else suffix
range_prefix = options.pop('--range-prefix', '') or self.range_prefix
if ranges:
try:
names, prefix = self._get_ranges(names), range_prefix
except ValueError:
pass
self._update_ns_opts(p, names)
self._update_ns_ranges(p, ranges)
return (
self._node_from_options(
p, name, prefix, suffix, cmd, append, options)
for name in names
)
def _node_from_options(self, p, name, prefix,
suffix, cmd, append, options):
namespace, nodename, _ = build_nodename(name, prefix, suffix)
namespace = nodename if nodename in p.namespaces else namespace
return Node(nodename, cmd, append,
p.optmerge(namespace, options), p.passthrough)
def _get_ranges(self, names):
noderange = int(names[0])
return [str(n) for n in range(1, noderange + 1)]
def _update_ns_opts(self, p, names):
# Numbers in args always refers to the index in the list of names.
# (e.g., `start foo bar baz -c:1` where 1 is foo, 2 is bar, and so on).
for ns_name, ns_opts in list(p.namespaces.items()):
if ns_name.isdigit():
ns_index = int(ns_name) - 1
if ns_index < 0:
raise KeyError(f'Indexes start at 1 got: {ns_name!r}')
try:
p.namespaces[names[ns_index]].update(ns_opts)
except IndexError:
raise KeyError(f'No node at index {ns_name!r}')
def _update_ns_ranges(self, p, ranges):
for ns_name, ns_opts in list(p.namespaces.items()):
if ',' in ns_name or (ranges and '-' in ns_name):
for subns in self._parse_ns_range(ns_name, ranges):
p.namespaces[subns].update(ns_opts)
p.namespaces.pop(ns_name)
def _parse_ns_range(self, ns, ranges=False):
ret = []
for space in ',' in ns and ns.split(',') or [ns]:
if ranges and '-' in space:
start, stop = space.split('-')
ret.extend(
str(n) for n in range(int(start), int(stop) + 1)
)
else:
ret.append(space)
return ret
class Cluster(UserList):
"""Represent a cluster of workers."""
def __init__(self, nodes, cmd=None, env=None,
on_stopping_preamble=None,
on_send_signal=None,
on_still_waiting_for=None,
on_still_waiting_progress=None,
on_still_waiting_end=None,
on_node_start=None,
on_node_restart=None,
on_node_shutdown_ok=None,
on_node_status=None,
on_node_signal=None,
on_node_signal_dead=None,
on_node_down=None,
on_child_spawn=None,
on_child_signalled=None,
on_child_failure=None):
self.nodes = nodes
self.cmd = cmd or celery_exe('worker')
self.env = env
self.on_stopping_preamble = on_stopping_preamble
self.on_send_signal = on_send_signal
self.on_still_waiting_for = on_still_waiting_for
self.on_still_waiting_progress = on_still_waiting_progress
self.on_still_waiting_end = on_still_waiting_end
self.on_node_start = on_node_start
self.on_node_restart = on_node_restart
self.on_node_shutdown_ok = on_node_shutdown_ok
self.on_node_status = on_node_status
self.on_node_signal = on_node_signal
self.on_node_signal_dead = on_node_signal_dead
self.on_node_down = on_node_down
self.on_child_spawn = on_child_spawn
self.on_child_signalled = on_child_signalled
self.on_child_failure = on_child_failure
def start(self):
return [self.start_node(node) for node in self]
def start_node(self, node):
maybe_call(self.on_node_start, node)
retcode = self._start_node(node)
maybe_call(self.on_node_status, node, retcode)
return retcode
def _start_node(self, node):
return node.start(
self.env,
on_spawn=self.on_child_spawn,
on_signalled=self.on_child_signalled,
on_failure=self.on_child_failure,
)
def send_all(self, sig):
for node in self.getpids(on_down=self.on_node_down):
maybe_call(self.on_node_signal, node, signal_name(sig))
node.send(sig, self.on_node_signal_dead)
def kill(self):
return self.send_all(signal.SIGKILL)
def restart(self, sig=signal.SIGTERM):
retvals = []
def restart_on_down(node):
maybe_call(self.on_node_restart, node)
retval = self._start_node(node)
maybe_call(self.on_node_status, node, retval)
retvals.append(retval)
self._stop_nodes(retry=2, on_down=restart_on_down, sig=sig)
return retvals
def stop(self, retry=None, callback=None, sig=signal.SIGTERM):
return self._stop_nodes(retry=retry, on_down=callback, sig=sig)
def stopwait(self, retry=2, callback=None, sig=signal.SIGTERM):
return self._stop_nodes(retry=retry, on_down=callback, sig=sig)
def _stop_nodes(self, retry=None, on_down=None, sig=signal.SIGTERM):
on_down = on_down if on_down is not None else self.on_node_down
nodes = list(self.getpids(on_down=on_down))
if nodes:
for node in self.shutdown_nodes(nodes, sig=sig, retry=retry):
maybe_call(on_down, node)
def shutdown_nodes(self, nodes, sig=signal.SIGTERM, retry=None):
P = set(nodes)
maybe_call(self.on_stopping_preamble, nodes)
to_remove = set()
for node in P:
maybe_call(self.on_send_signal, node, signal_name(sig))
if not node.send(sig, self.on_node_signal_dead):
to_remove.add(node)
yield node
P -= to_remove
if retry:
maybe_call(self.on_still_waiting_for, P)
its = 0
while P:
to_remove = set()
for node in P:
its += 1
maybe_call(self.on_still_waiting_progress, P)
if not node.alive():
maybe_call(self.on_node_shutdown_ok, node)
to_remove.add(node)
yield node
maybe_call(self.on_still_waiting_for, P)
break
P -= to_remove
if P and not its % len(P):
sleep(float(retry))
maybe_call(self.on_still_waiting_end)
def find(self, name):
for node in self:
if node.name == name:
return node
raise KeyError(name)
def getpids(self, on_down=None):
for node in self:
if node.pid:
yield node
else:
maybe_call(on_down, node)
def __repr__(self):
return '<{name}({0}): {1}>'.format(
len(self), saferepr([n.name for n in self]),
name=type(self).__name__,
)
@property
def data(self):
return self.nodes

View File

@@ -0,0 +1,387 @@
"""Worker command-line program.
This module is the 'program-version' of :mod:`celery.worker`.
It does everything necessary to run that module
as an actual application, like installing signal handlers,
platform tweaks, and so on.
"""
import logging
import os
import platform as _platform
import sys
from datetime import datetime
from functools import partial
from billiard.common import REMAP_SIGTERM
from billiard.process import current_process
from kombu.utils.encoding import safe_str
from celery import VERSION_BANNER, platforms, signals
from celery.app import trace
from celery.loaders.app import AppLoader
from celery.platforms import EX_FAILURE, EX_OK, check_privileges
from celery.utils import static, term
from celery.utils.debug import cry
from celery.utils.imports import qualname
from celery.utils.log import get_logger, in_sighandler, set_in_sighandler
from celery.utils.text import pluralize
from celery.worker import WorkController
__all__ = ('Worker',)
logger = get_logger(__name__)
is_jython = sys.platform.startswith('java')
is_pypy = hasattr(sys, 'pypy_version_info')
ARTLINES = [
' --------------',
'--- ***** -----',
'-- ******* ----',
'- *** --- * ---',
'- ** ----------',
'- ** ----------',
'- ** ----------',
'- ** ----------',
'- *** --- * ---',
'-- ******* ----',
'--- ***** -----',
' --------------',
]
BANNER = """\
{hostname} v{version}
{platform} {timestamp}
[config]
.> app: {app}
.> transport: {conninfo}
.> results: {results}
.> concurrency: {concurrency}
.> task events: {events}
[queues]
{queues}
"""
EXTRA_INFO_FMT = """
[tasks]
{tasks}
"""
def active_thread_count():
from threading import enumerate
return sum(1 for t in enumerate()
if not t.name.startswith('Dummy-'))
def safe_say(msg):
print(f'\n{msg}', file=sys.__stderr__, flush=True)
class Worker(WorkController):
"""Worker as a program."""
def on_before_init(self, quiet=False, **kwargs):
self.quiet = quiet
trace.setup_worker_optimizations(self.app, self.hostname)
# this signal can be used to set up configuration for
# workers by name.
signals.celeryd_init.send(
sender=self.hostname, instance=self,
conf=self.app.conf, options=kwargs,
)
check_privileges(self.app.conf.accept_content)
def on_after_init(self, purge=False, no_color=None,
redirect_stdouts=None, redirect_stdouts_level=None,
**kwargs):
self.redirect_stdouts = self.app.either(
'worker_redirect_stdouts', redirect_stdouts)
self.redirect_stdouts_level = self.app.either(
'worker_redirect_stdouts_level', redirect_stdouts_level)
super().setup_defaults(**kwargs)
self.purge = purge
self.no_color = no_color
self._isatty = sys.stdout.isatty()
self.colored = self.app.log.colored(
self.logfile,
enabled=not no_color if no_color is not None else no_color
)
def on_init_blueprint(self):
self._custom_logging = self.setup_logging()
# apply task execution optimizations
# -- This will finalize the app!
trace.setup_worker_optimizations(self.app, self.hostname)
def on_start(self):
app = self.app
super().on_start()
# this signal can be used to, for example, change queues after
# the -Q option has been applied.
signals.celeryd_after_setup.send(
sender=self.hostname, instance=self, conf=app.conf,
)
if self.purge:
self.purge_messages()
if not self.quiet:
self.emit_banner()
self.set_process_status('-active-')
self.install_platform_tweaks(self)
if not self._custom_logging and self.redirect_stdouts:
app.log.redirect_stdouts(self.redirect_stdouts_level)
# TODO: Remove the following code in Celery 6.0
# This qualifies as a hack for issue #6366.
warn_deprecated = True
config_source = app._config_source
if isinstance(config_source, str):
# Don't raise the warning when the settings originate from
# django.conf:settings
warn_deprecated = config_source.lower() not in [
'django.conf:settings',
]
if warn_deprecated:
if app.conf.maybe_warn_deprecated_settings():
logger.warning(
"Please run `celery upgrade settings path/to/settings.py` "
"to avoid these warnings and to allow a smoother upgrade "
"to Celery 6.0."
)
def emit_banner(self):
# Dump configuration to screen so we have some basic information
# for when users sends bug reports.
use_image = term.supports_images()
if use_image:
print(term.imgcat(static.logo()))
print(safe_str(''.join([
str(self.colored.cyan(
' \n', self.startup_info(artlines=not use_image))),
str(self.colored.reset(self.extra_info() or '')),
])), file=sys.__stdout__, flush=True)
def on_consumer_ready(self, consumer):
signals.worker_ready.send(sender=consumer)
logger.info('%s ready.', safe_str(self.hostname))
def setup_logging(self, colorize=None):
if colorize is None and self.no_color is not None:
colorize = not self.no_color
return self.app.log.setup(
self.loglevel, self.logfile,
redirect_stdouts=False, colorize=colorize, hostname=self.hostname,
)
def purge_messages(self):
with self.app.connection_for_write() as connection:
count = self.app.control.purge(connection=connection)
if count: # pragma: no cover
print(f"purge: Erased {count} {pluralize(count, 'message')} from the queue.\n", flush=True)
def tasklist(self, include_builtins=True, sep='\n', int_='celery.'):
return sep.join(
f' . {task}' for task in sorted(self.app.tasks)
if (not task.startswith(int_) if not include_builtins else task)
)
def extra_info(self):
if self.loglevel is None:
return
if self.loglevel <= logging.INFO:
include_builtins = self.loglevel <= logging.DEBUG
tasklist = self.tasklist(include_builtins=include_builtins)
return EXTRA_INFO_FMT.format(tasks=tasklist)
def startup_info(self, artlines=True):
app = self.app
concurrency = str(self.concurrency)
appr = '{}:{:#x}'.format(app.main or '__main__', id(app))
if not isinstance(app.loader, AppLoader):
loader = qualname(app.loader)
if loader.startswith('celery.loaders'): # pragma: no cover
loader = loader[14:]
appr += f' ({loader})'
if self.autoscale:
max, min = self.autoscale
concurrency = f'{{min={min}, max={max}}}'
pool = self.pool_cls
if not isinstance(pool, str):
pool = pool.__module__
concurrency += f" ({pool.split('.')[-1]})"
events = 'ON'
if not self.task_events:
events = 'OFF (enable -E to monitor tasks in this worker)'
banner = BANNER.format(
app=appr,
hostname=safe_str(self.hostname),
timestamp=datetime.now().replace(microsecond=0),
version=VERSION_BANNER,
conninfo=self.app.connection().as_uri(),
results=self.app.backend.as_uri(),
concurrency=concurrency,
platform=safe_str(_platform.platform()),
events=events,
queues=app.amqp.queues.format(indent=0, indent_first=False),
).splitlines()
# integrate the ASCII art.
if artlines:
for i, _ in enumerate(banner):
try:
banner[i] = ' '.join([ARTLINES[i], banner[i]])
except IndexError:
banner[i] = ' ' * 16 + banner[i]
return '\n'.join(banner) + '\n'
def install_platform_tweaks(self, worker):
"""Install platform specific tweaks and workarounds."""
if self.app.IS_macOS:
self.macOS_proxy_detection_workaround()
# Install signal handler so SIGHUP restarts the worker.
if not self._isatty:
# only install HUP handler if detached from terminal,
# so closing the terminal window doesn't restart the worker
# into the background.
if self.app.IS_macOS:
# macOS can't exec from a process using threads.
# See https://github.com/celery/celery/issues#issue/152
install_HUP_not_supported_handler(worker)
else:
install_worker_restart_handler(worker)
install_worker_term_handler(worker)
install_worker_term_hard_handler(worker)
install_worker_int_handler(worker)
install_cry_handler()
install_rdb_handler()
def macOS_proxy_detection_workaround(self):
"""See https://github.com/celery/celery/issues#issue/161."""
os.environ.setdefault('celery_dummy_proxy', 'set_by_celeryd')
def set_process_status(self, info):
return platforms.set_mp_process_title(
'celeryd',
info=f'{info} ({platforms.strargv(sys.argv)})',
hostname=self.hostname,
)
def _shutdown_handler(worker, sig='TERM', how='Warm',
callback=None, exitcode=EX_OK):
def _handle_request(*args):
with in_sighandler():
from celery.worker import state
if current_process()._name == 'MainProcess':
if callback:
callback(worker)
safe_say(f'worker: {how} shutdown (MainProcess)')
signals.worker_shutting_down.send(
sender=worker.hostname, sig=sig, how=how,
exitcode=exitcode,
)
setattr(state, {'Warm': 'should_stop',
'Cold': 'should_terminate'}[how], exitcode)
_handle_request.__name__ = str(f'worker_{how}')
platforms.signals[sig] = _handle_request
if REMAP_SIGTERM == "SIGQUIT":
install_worker_term_handler = partial(
_shutdown_handler, sig='SIGTERM', how='Cold', exitcode=EX_FAILURE,
)
else:
install_worker_term_handler = partial(
_shutdown_handler, sig='SIGTERM', how='Warm',
)
if not is_jython: # pragma: no cover
install_worker_term_hard_handler = partial(
_shutdown_handler, sig='SIGQUIT', how='Cold',
exitcode=EX_FAILURE,
)
else: # pragma: no cover
install_worker_term_handler = \
install_worker_term_hard_handler = lambda *a, **kw: None
def on_SIGINT(worker):
safe_say('worker: Hitting Ctrl+C again will terminate all running tasks!')
install_worker_term_hard_handler(worker, sig='SIGINT')
if not is_jython: # pragma: no cover
install_worker_int_handler = partial(
_shutdown_handler, sig='SIGINT', callback=on_SIGINT,
exitcode=EX_FAILURE,
)
else: # pragma: no cover
def install_worker_int_handler(*args, **kwargs):
pass
def _reload_current_worker():
platforms.close_open_fds([
sys.__stdin__, sys.__stdout__, sys.__stderr__,
])
os.execv(sys.executable, [sys.executable] + sys.argv)
def install_worker_restart_handler(worker, sig='SIGHUP'):
def restart_worker_sig_handler(*args):
"""Signal handler restarting the current python program."""
set_in_sighandler(True)
safe_say(f"Restarting celery worker ({' '.join(sys.argv)})")
import atexit
atexit.register(_reload_current_worker)
from celery.worker import state
state.should_stop = EX_OK
platforms.signals[sig] = restart_worker_sig_handler
def install_cry_handler(sig='SIGUSR1'):
# PyPy does not have sys._current_frames
if is_pypy: # pragma: no cover
return
def cry_handler(*args):
"""Signal handler logging the stack-trace of all active threads."""
with in_sighandler():
safe_say(cry())
platforms.signals[sig] = cry_handler
def install_rdb_handler(envvar='CELERY_RDBSIG',
sig='SIGUSR2'): # pragma: no cover
def rdb_handler(*args):
"""Signal handler setting a rdb breakpoint at the current frame."""
with in_sighandler():
from celery.contrib.rdb import _frame, set_trace
# gevent does not pass standard signal handler args
frame = args[1] if args else _frame().f_back
set_trace(frame)
if os.environ.get(envvar):
platforms.signals[sig] = rdb_handler
def install_HUP_not_supported_handler(worker, sig='SIGHUP'):
def warn_on_HUP_handler(signum, frame):
with in_sighandler():
safe_say('{sig} not supported: Restarting with {sig} is '
'unstable on this platform!'.format(sig=sig))
platforms.signals[sig] = warn_on_HUP_handler

View File

@@ -0,0 +1 @@
"""Result Backends."""

View File

@@ -0,0 +1,190 @@
"""ArangoDb result store backend."""
# pylint: disable=W1202,W0703
from datetime import timedelta
from kombu.utils.objects import cached_property
from kombu.utils.url import _parse_url
from celery.exceptions import ImproperlyConfigured
from .base import KeyValueStoreBackend
try:
from pyArango import connection as py_arango_connection
from pyArango.theExceptions import AQLQueryError
except ImportError:
py_arango_connection = AQLQueryError = None
__all__ = ('ArangoDbBackend',)
class ArangoDbBackend(KeyValueStoreBackend):
"""ArangoDb backend.
Sample url
"arangodb://username:password@host:port/database/collection"
*arangodb_backend_settings* is where the settings are present
(in the app.conf)
Settings should contain the host, port, username, password, database name,
collection name else the default will be chosen.
Default database name and collection name is celery.
Raises
------
celery.exceptions.ImproperlyConfigured:
if module :pypi:`pyArango` is not available.
"""
host = '127.0.0.1'
port = '8529'
database = 'celery'
collection = 'celery'
username = None
password = None
# protocol is not supported in backend url (http is taken as default)
http_protocol = 'http'
verify = False
# Use str as arangodb key not bytes
key_t = str
def __init__(self, url=None, *args, **kwargs):
"""Parse the url or load the settings from settings object."""
super().__init__(*args, **kwargs)
if py_arango_connection is None:
raise ImproperlyConfigured(
'You need to install the pyArango library to use the '
'ArangoDb backend.',
)
self.url = url
if url is None:
host = port = database = collection = username = password = None
else:
(
_schema, host, port, username, password,
database_collection, _query
) = _parse_url(url)
if database_collection is None:
database = collection = None
else:
database, collection = database_collection.split('/')
config = self.app.conf.get('arangodb_backend_settings', None)
if config is not None:
if not isinstance(config, dict):
raise ImproperlyConfigured(
'ArangoDb backend settings should be grouped in a dict',
)
else:
config = {}
self.host = host or config.get('host', self.host)
self.port = int(port or config.get('port', self.port))
self.http_protocol = config.get('http_protocol', self.http_protocol)
self.verify = config.get('verify', self.verify)
self.database = database or config.get('database', self.database)
self.collection = \
collection or config.get('collection', self.collection)
self.username = username or config.get('username', self.username)
self.password = password or config.get('password', self.password)
self.arangodb_url = "{http_protocol}://{host}:{port}".format(
http_protocol=self.http_protocol, host=self.host, port=self.port
)
self._connection = None
@property
def connection(self):
"""Connect to the arangodb server."""
if self._connection is None:
self._connection = py_arango_connection.Connection(
arangoURL=self.arangodb_url, username=self.username,
password=self.password, verify=self.verify
)
return self._connection
@property
def db(self):
"""Database Object to the given database."""
return self.connection[self.database]
@cached_property
def expires_delta(self):
return timedelta(seconds=0 if self.expires is None else self.expires)
def get(self, key):
if key is None:
return None
query = self.db.AQLQuery(
"RETURN DOCUMENT(@@collection, @key).task",
rawResults=True,
bindVars={
"@collection": self.collection,
"key": key,
},
)
return next(query) if len(query) > 0 else None
def set(self, key, value):
self.db.AQLQuery(
"""
UPSERT {_key: @key}
INSERT {_key: @key, task: @value}
UPDATE {task: @value} IN @@collection
""",
bindVars={
"@collection": self.collection,
"key": key,
"value": value,
},
)
def mget(self, keys):
if keys is None:
return
query = self.db.AQLQuery(
"FOR k IN @keys RETURN DOCUMENT(@@collection, k).task",
rawResults=True,
bindVars={
"@collection": self.collection,
"keys": keys if isinstance(keys, list) else list(keys),
},
)
while True:
yield from query
try:
query.nextBatch()
except StopIteration:
break
def delete(self, key):
if key is None:
return
self.db.AQLQuery(
"REMOVE {_key: @key} IN @@collection",
bindVars={
"@collection": self.collection,
"key": key,
},
)
def cleanup(self):
if not self.expires:
return
checkpoint = (self.app.now() - self.expires_delta).isoformat()
self.db.AQLQuery(
"""
FOR record IN @@collection
FILTER record.task.date_done < @checkpoint
REMOVE record IN @@collection
""",
bindVars={
"@collection": self.collection,
"checkpoint": checkpoint,
},
)

View File

@@ -0,0 +1,333 @@
"""Async I/O backend support utilities."""
import socket
import threading
import time
from collections import deque
from queue import Empty
from time import sleep
from weakref import WeakKeyDictionary
from kombu.utils.compat import detect_environment
from celery import states
from celery.exceptions import TimeoutError
from celery.utils.threads import THREAD_TIMEOUT_MAX
__all__ = (
'AsyncBackendMixin', 'BaseResultConsumer', 'Drainer',
'register_drainer',
)
drainers = {}
def register_drainer(name):
"""Decorator used to register a new result drainer type."""
def _inner(cls):
drainers[name] = cls
return cls
return _inner
@register_drainer('default')
class Drainer:
"""Result draining service."""
def __init__(self, result_consumer):
self.result_consumer = result_consumer
def start(self):
pass
def stop(self):
pass
def drain_events_until(self, p, timeout=None, interval=1, on_interval=None, wait=None):
wait = wait or self.result_consumer.drain_events
time_start = time.monotonic()
while 1:
# Total time spent may exceed a single call to wait()
if timeout and time.monotonic() - time_start >= timeout:
raise socket.timeout()
try:
yield self.wait_for(p, wait, timeout=interval)
except socket.timeout:
pass
if on_interval:
on_interval()
if p.ready: # got event on the wanted channel.
break
def wait_for(self, p, wait, timeout=None):
wait(timeout=timeout)
class greenletDrainer(Drainer):
spawn = None
_g = None
_drain_complete_event = None # event, sended (and recreated) after every drain_events iteration
def _create_drain_complete_event(self):
"""create new self._drain_complete_event object"""
pass
def _send_drain_complete_event(self):
"""raise self._drain_complete_event for wakeup .wait_for"""
pass
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._started = threading.Event()
self._stopped = threading.Event()
self._shutdown = threading.Event()
self._create_drain_complete_event()
def run(self):
self._started.set()
while not self._stopped.is_set():
try:
self.result_consumer.drain_events(timeout=1)
self._send_drain_complete_event()
self._create_drain_complete_event()
except socket.timeout:
pass
self._shutdown.set()
def start(self):
if not self._started.is_set():
self._g = self.spawn(self.run)
self._started.wait()
def stop(self):
self._stopped.set()
self._send_drain_complete_event()
self._shutdown.wait(THREAD_TIMEOUT_MAX)
def wait_for(self, p, wait, timeout=None):
self.start()
if not p.ready:
self._drain_complete_event.wait(timeout=timeout)
@register_drainer('eventlet')
class eventletDrainer(greenletDrainer):
def spawn(self, func):
from eventlet import sleep, spawn
g = spawn(func)
sleep(0)
return g
def _create_drain_complete_event(self):
from eventlet.event import Event
self._drain_complete_event = Event()
def _send_drain_complete_event(self):
self._drain_complete_event.send()
@register_drainer('gevent')
class geventDrainer(greenletDrainer):
def spawn(self, func):
import gevent
g = gevent.spawn(func)
gevent.sleep(0)
return g
def _create_drain_complete_event(self):
from gevent.event import Event
self._drain_complete_event = Event()
def _send_drain_complete_event(self):
self._drain_complete_event.set()
self._create_drain_complete_event()
class AsyncBackendMixin:
"""Mixin for backends that enables the async API."""
def _collect_into(self, result, bucket):
self.result_consumer.buckets[result] = bucket
def iter_native(self, result, no_ack=True, **kwargs):
self._ensure_not_eager()
results = result.results
if not results:
raise StopIteration()
# we tell the result consumer to put consumed results
# into these buckets.
bucket = deque()
for node in results:
if not hasattr(node, '_cache'):
bucket.append(node)
elif node._cache:
bucket.append(node)
else:
self._collect_into(node, bucket)
for _ in self._wait_for_pending(result, no_ack=no_ack, **kwargs):
while bucket:
node = bucket.popleft()
if not hasattr(node, '_cache'):
yield node.id, node.children
else:
yield node.id, node._cache
while bucket:
node = bucket.popleft()
yield node.id, node._cache
def add_pending_result(self, result, weak=False, start_drainer=True):
if start_drainer:
self.result_consumer.drainer.start()
try:
self._maybe_resolve_from_buffer(result)
except Empty:
self._add_pending_result(result.id, result, weak=weak)
return result
def _maybe_resolve_from_buffer(self, result):
result._maybe_set_cache(self._pending_messages.take(result.id))
def _add_pending_result(self, task_id, result, weak=False):
concrete, weak_ = self._pending_results
if task_id not in weak_ and result.id not in concrete:
(weak_ if weak else concrete)[task_id] = result
self.result_consumer.consume_from(task_id)
def add_pending_results(self, results, weak=False):
self.result_consumer.drainer.start()
return [self.add_pending_result(result, weak=weak, start_drainer=False)
for result in results]
def remove_pending_result(self, result):
self._remove_pending_result(result.id)
self.on_result_fulfilled(result)
return result
def _remove_pending_result(self, task_id):
for mapping in self._pending_results:
mapping.pop(task_id, None)
def on_result_fulfilled(self, result):
self.result_consumer.cancel_for(result.id)
def wait_for_pending(self, result,
callback=None, propagate=True, **kwargs):
self._ensure_not_eager()
for _ in self._wait_for_pending(result, **kwargs):
pass
return result.maybe_throw(callback=callback, propagate=propagate)
def _wait_for_pending(self, result,
timeout=None, on_interval=None, on_message=None,
**kwargs):
return self.result_consumer._wait_for_pending(
result, timeout=timeout,
on_interval=on_interval, on_message=on_message,
**kwargs
)
@property
def is_async(self):
return True
class BaseResultConsumer:
"""Manager responsible for consuming result messages."""
def __init__(self, backend, app, accept,
pending_results, pending_messages):
self.backend = backend
self.app = app
self.accept = accept
self._pending_results = pending_results
self._pending_messages = pending_messages
self.on_message = None
self.buckets = WeakKeyDictionary()
self.drainer = drainers[detect_environment()](self)
def start(self, initial_task_id, **kwargs):
raise NotImplementedError()
def stop(self):
pass
def drain_events(self, timeout=None):
raise NotImplementedError()
def consume_from(self, task_id):
raise NotImplementedError()
def cancel_for(self, task_id):
raise NotImplementedError()
def _after_fork(self):
self.buckets.clear()
self.buckets = WeakKeyDictionary()
self.on_message = None
self.on_after_fork()
def on_after_fork(self):
pass
def drain_events_until(self, p, timeout=None, on_interval=None):
return self.drainer.drain_events_until(
p, timeout=timeout, on_interval=on_interval)
def _wait_for_pending(self, result,
timeout=None, on_interval=None, on_message=None,
**kwargs):
self.on_wait_for_pending(result, timeout=timeout, **kwargs)
prev_on_m, self.on_message = self.on_message, on_message
try:
for _ in self.drain_events_until(
result.on_ready, timeout=timeout,
on_interval=on_interval):
yield
sleep(0)
except socket.timeout:
raise TimeoutError('The operation timed out.')
finally:
self.on_message = prev_on_m
def on_wait_for_pending(self, result, timeout=None, **kwargs):
pass
def on_out_of_band_result(self, message):
self.on_state_change(message.payload, message)
def _get_pending_result(self, task_id):
for mapping in self._pending_results:
try:
return mapping[task_id]
except KeyError:
pass
raise KeyError(task_id)
def on_state_change(self, meta, message):
if self.on_message:
self.on_message(meta)
if meta['status'] in states.READY_STATES:
task_id = meta['task_id']
try:
result = self._get_pending_result(task_id)
except KeyError:
# send to buffer in case we received this result
# before it was added to _pending_results.
self._pending_messages.put(task_id, meta)
else:
result._maybe_set_cache(meta)
buckets = self.buckets
try:
# remove bucket for this result, since it's fulfilled
bucket = buckets.pop(result)
except KeyError:
pass
else:
# send to waiter via bucket
bucket.append(result)
sleep(0)

View File

@@ -0,0 +1,165 @@
"""The Azure Storage Block Blob backend for Celery."""
from kombu.utils import cached_property
from kombu.utils.encoding import bytes_to_str
from celery.exceptions import ImproperlyConfigured
from celery.utils.log import get_logger
from .base import KeyValueStoreBackend
try:
import azure.storage.blob as azurestorage
from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError
from azure.storage.blob import BlobServiceClient
except ImportError:
azurestorage = None
__all__ = ("AzureBlockBlobBackend",)
LOGGER = get_logger(__name__)
AZURE_BLOCK_BLOB_CONNECTION_PREFIX = 'azureblockblob://'
class AzureBlockBlobBackend(KeyValueStoreBackend):
"""Azure Storage Block Blob backend for Celery."""
def __init__(self,
url=None,
container_name=None,
*args,
**kwargs):
super().__init__(*args, **kwargs)
if azurestorage is None or azurestorage.__version__ < '12':
raise ImproperlyConfigured(
"You need to install the azure-storage-blob v12 library to"
"use the AzureBlockBlob backend")
conf = self.app.conf
self._connection_string = self._parse_url(url)
self._container_name = (
container_name or
conf["azureblockblob_container_name"])
self.base_path = conf.get('azureblockblob_base_path', '')
self._connection_timeout = conf.get(
'azureblockblob_connection_timeout', 20
)
self._read_timeout = conf.get('azureblockblob_read_timeout', 120)
@classmethod
def _parse_url(cls, url, prefix=AZURE_BLOCK_BLOB_CONNECTION_PREFIX):
connection_string = url[len(prefix):]
if not connection_string:
raise ImproperlyConfigured("Invalid URL")
return connection_string
@cached_property
def _blob_service_client(self):
"""Return the Azure Storage Blob service client.
If this is the first call to the property, the client is created and
the container is created if it doesn't yet exist.
"""
client = BlobServiceClient.from_connection_string(
self._connection_string,
connection_timeout=self._connection_timeout,
read_timeout=self._read_timeout
)
try:
client.create_container(name=self._container_name)
msg = f"Container created with name {self._container_name}."
except ResourceExistsError:
msg = f"Container with name {self._container_name} already." \
"exists. This will not be created."
LOGGER.info(msg)
return client
def get(self, key):
"""Read the value stored at the given key.
Args:
key: The key for which to read the value.
"""
key = bytes_to_str(key)
LOGGER.debug("Getting Azure Block Blob %s/%s", self._container_name, key)
blob_client = self._blob_service_client.get_blob_client(
container=self._container_name,
blob=f'{self.base_path}{key}',
)
try:
return blob_client.download_blob().readall().decode()
except ResourceNotFoundError:
return None
def set(self, key, value):
"""Store a value for a given key.
Args:
key: The key at which to store the value.
value: The value to store.
"""
key = bytes_to_str(key)
LOGGER.debug(f"Creating azure blob at {self._container_name}/{key}")
blob_client = self._blob_service_client.get_blob_client(
container=self._container_name,
blob=f'{self.base_path}{key}',
)
blob_client.upload_blob(value, overwrite=True)
def mget(self, keys):
"""Read all the values for the provided keys.
Args:
keys: The list of keys to read.
"""
return [self.get(key) for key in keys]
def delete(self, key):
"""Delete the value at a given key.
Args:
key: The key of the value to delete.
"""
key = bytes_to_str(key)
LOGGER.debug(f"Deleting azure blob at {self._container_name}/{key}")
blob_client = self._blob_service_client.get_blob_client(
container=self._container_name,
blob=f'{self.base_path}{key}',
)
blob_client.delete_blob()
def as_uri(self, include_password=False):
if include_password:
return (
f'{AZURE_BLOCK_BLOB_CONNECTION_PREFIX}'
f'{self._connection_string}'
)
connection_string_parts = self._connection_string.split(';')
account_key_prefix = 'AccountKey='
redacted_connection_string_parts = [
f'{account_key_prefix}**' if part.startswith(account_key_prefix)
else part
for part in connection_string_parts
]
return (
f'{AZURE_BLOCK_BLOB_CONNECTION_PREFIX}'
f'{";".join(redacted_connection_string_parts)}'
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,163 @@
"""Memcached and in-memory cache result backend."""
from kombu.utils.encoding import bytes_to_str, ensure_bytes
from kombu.utils.objects import cached_property
from celery.exceptions import ImproperlyConfigured
from celery.utils.functional import LRUCache
from .base import KeyValueStoreBackend
__all__ = ('CacheBackend',)
_imp = [None]
REQUIRES_BACKEND = """\
The Memcached backend requires either pylibmc or python-memcached.\
"""
UNKNOWN_BACKEND = """\
The cache backend {0!r} is unknown,
Please use one of the following backends instead: {1}\
"""
# Global shared in-memory cache for in-memory cache client
# This is to share cache between threads
_DUMMY_CLIENT_CACHE = LRUCache(limit=5000)
def import_best_memcache():
if _imp[0] is None:
is_pylibmc, memcache_key_t = False, bytes_to_str
try:
import pylibmc as memcache
is_pylibmc = True
except ImportError:
try:
import memcache
except ImportError:
raise ImproperlyConfigured(REQUIRES_BACKEND)
_imp[0] = (is_pylibmc, memcache, memcache_key_t)
return _imp[0]
def get_best_memcache(*args, **kwargs):
# pylint: disable=unpacking-non-sequence
# This is most definitely a sequence, but pylint thinks it's not.
is_pylibmc, memcache, key_t = import_best_memcache()
Client = _Client = memcache.Client
if not is_pylibmc:
def Client(*args, **kwargs): # noqa: F811
kwargs.pop('behaviors', None)
return _Client(*args, **kwargs)
return Client, key_t
class DummyClient:
def __init__(self, *args, **kwargs):
self.cache = _DUMMY_CLIENT_CACHE
def get(self, key, *args, **kwargs):
return self.cache.get(key)
def get_multi(self, keys):
cache = self.cache
return {k: cache[k] for k in keys if k in cache}
def set(self, key, value, *args, **kwargs):
self.cache[key] = value
def delete(self, key, *args, **kwargs):
self.cache.pop(key, None)
def incr(self, key, delta=1):
return self.cache.incr(key, delta)
def touch(self, key, expire):
pass
backends = {
'memcache': get_best_memcache,
'memcached': get_best_memcache,
'pylibmc': get_best_memcache,
'memory': lambda: (DummyClient, ensure_bytes),
}
class CacheBackend(KeyValueStoreBackend):
"""Cache result backend."""
servers = None
supports_autoexpire = True
supports_native_join = True
implements_incr = True
def __init__(self, app, expires=None, backend=None,
options=None, url=None, **kwargs):
options = {} if not options else options
super().__init__(app, **kwargs)
self.url = url
self.options = dict(self.app.conf.cache_backend_options,
**options)
self.backend = url or backend or self.app.conf.cache_backend
if self.backend:
self.backend, _, servers = self.backend.partition('://')
self.servers = servers.rstrip('/').split(';')
self.expires = self.prepare_expires(expires, type=int)
try:
self.Client, self.key_t = backends[self.backend]()
except KeyError:
raise ImproperlyConfigured(UNKNOWN_BACKEND.format(
self.backend, ', '.join(backends)))
self._encode_prefixes() # rencode the keyprefixes
def get(self, key):
return self.client.get(key)
def mget(self, keys):
return self.client.get_multi(keys)
def set(self, key, value):
return self.client.set(key, value, self.expires)
def delete(self, key):
return self.client.delete(key)
def _apply_chord_incr(self, header_result_args, body, **kwargs):
chord_key = self.get_key_for_chord(header_result_args[0])
self.client.set(chord_key, 0, time=self.expires)
return super()._apply_chord_incr(
header_result_args, body, **kwargs)
def incr(self, key):
return self.client.incr(key)
def expire(self, key, value):
return self.client.touch(key, value)
@cached_property
def client(self):
return self.Client(self.servers, **self.options)
def __reduce__(self, args=(), kwargs=None):
kwargs = {} if not kwargs else kwargs
servers = ';'.join(self.servers)
backend = f'{self.backend}://{servers}/'
kwargs.update(
{'backend': backend,
'expires': self.expires,
'options': self.options})
return super().__reduce__(args, kwargs)
def as_uri(self, *args, **kwargs):
"""Return the backend as an URI.
This properly handles the case of multiple servers.
"""
servers = ';'.join(self.servers)
return f'{self.backend}://{servers}/'

View File

@@ -0,0 +1,256 @@
"""Apache Cassandra result store backend using the DataStax driver."""
import threading
from celery import states
from celery.exceptions import ImproperlyConfigured
from celery.utils.log import get_logger
from .base import BaseBackend
try: # pragma: no cover
import cassandra
import cassandra.auth
import cassandra.cluster
import cassandra.query
except ImportError:
cassandra = None
__all__ = ('CassandraBackend',)
logger = get_logger(__name__)
E_NO_CASSANDRA = """
You need to install the cassandra-driver library to
use the Cassandra backend. See https://github.com/datastax/python-driver
"""
E_NO_SUCH_CASSANDRA_AUTH_PROVIDER = """
CASSANDRA_AUTH_PROVIDER you provided is not a valid auth_provider class.
See https://datastax.github.io/python-driver/api/cassandra/auth.html.
"""
E_CASSANDRA_MISCONFIGURED = 'Cassandra backend improperly configured.'
E_CASSANDRA_NOT_CONFIGURED = 'Cassandra backend not configured.'
Q_INSERT_RESULT = """
INSERT INTO {table} (
task_id, status, result, date_done, traceback, children) VALUES (
%s, %s, %s, %s, %s, %s) {expires};
"""
Q_SELECT_RESULT = """
SELECT status, result, date_done, traceback, children
FROM {table}
WHERE task_id=%s
LIMIT 1
"""
Q_CREATE_RESULT_TABLE = """
CREATE TABLE {table} (
task_id text,
status text,
result blob,
date_done timestamp,
traceback blob,
children blob,
PRIMARY KEY ((task_id), date_done)
) WITH CLUSTERING ORDER BY (date_done DESC);
"""
Q_EXPIRES = """
USING TTL {0}
"""
def buf_t(x):
return bytes(x, 'utf8')
class CassandraBackend(BaseBackend):
"""Cassandra/AstraDB backend utilizing DataStax driver.
Raises:
celery.exceptions.ImproperlyConfigured:
if module :pypi:`cassandra-driver` is not available,
or not-exactly-one of the :setting:`cassandra_servers` and
the :setting:`cassandra_secure_bundle_path` settings is set.
"""
#: List of Cassandra servers with format: ``hostname``.
servers = None
#: Location of the secure connect bundle zipfile (absolute path).
bundle_path = None
supports_autoexpire = True # autoexpire supported via entry_ttl
def __init__(self, servers=None, keyspace=None, table=None, entry_ttl=None,
port=9042, bundle_path=None, **kwargs):
super().__init__(**kwargs)
if not cassandra:
raise ImproperlyConfigured(E_NO_CASSANDRA)
conf = self.app.conf
self.servers = servers or conf.get('cassandra_servers', None)
self.bundle_path = bundle_path or conf.get(
'cassandra_secure_bundle_path', None)
self.port = port or conf.get('cassandra_port', None)
self.keyspace = keyspace or conf.get('cassandra_keyspace', None)
self.table = table or conf.get('cassandra_table', None)
self.cassandra_options = conf.get('cassandra_options', {})
# either servers or bundle path must be provided...
db_directions = self.servers or self.bundle_path
if not db_directions or not self.keyspace or not self.table:
raise ImproperlyConfigured(E_CASSANDRA_NOT_CONFIGURED)
# ...but not both:
if self.servers and self.bundle_path:
raise ImproperlyConfigured(E_CASSANDRA_MISCONFIGURED)
expires = entry_ttl or conf.get('cassandra_entry_ttl', None)
self.cqlexpires = (
Q_EXPIRES.format(expires) if expires is not None else '')
read_cons = conf.get('cassandra_read_consistency') or 'LOCAL_QUORUM'
write_cons = conf.get('cassandra_write_consistency') or 'LOCAL_QUORUM'
self.read_consistency = getattr(
cassandra.ConsistencyLevel, read_cons,
cassandra.ConsistencyLevel.LOCAL_QUORUM)
self.write_consistency = getattr(
cassandra.ConsistencyLevel, write_cons,
cassandra.ConsistencyLevel.LOCAL_QUORUM)
self.auth_provider = None
auth_provider = conf.get('cassandra_auth_provider', None)
auth_kwargs = conf.get('cassandra_auth_kwargs', None)
if auth_provider and auth_kwargs:
auth_provider_class = getattr(cassandra.auth, auth_provider, None)
if not auth_provider_class:
raise ImproperlyConfigured(E_NO_SUCH_CASSANDRA_AUTH_PROVIDER)
self.auth_provider = auth_provider_class(**auth_kwargs)
self._cluster = None
self._session = None
self._write_stmt = None
self._read_stmt = None
self._lock = threading.RLock()
def _get_connection(self, write=False):
"""Prepare the connection for action.
Arguments:
write (bool): are we a writer?
"""
if self._session is not None:
return
self._lock.acquire()
try:
if self._session is not None:
return
# using either 'servers' or 'bundle_path' here:
if self.servers:
self._cluster = cassandra.cluster.Cluster(
self.servers, port=self.port,
auth_provider=self.auth_provider,
**self.cassandra_options)
else:
# 'bundle_path' is guaranteed to be set
self._cluster = cassandra.cluster.Cluster(
cloud={
'secure_connect_bundle': self.bundle_path,
},
auth_provider=self.auth_provider,
**self.cassandra_options)
self._session = self._cluster.connect(self.keyspace)
# We're forced to do concatenation below, as formatting would
# blow up on superficial %s that'll be processed by Cassandra
self._write_stmt = cassandra.query.SimpleStatement(
Q_INSERT_RESULT.format(
table=self.table, expires=self.cqlexpires),
)
self._write_stmt.consistency_level = self.write_consistency
self._read_stmt = cassandra.query.SimpleStatement(
Q_SELECT_RESULT.format(table=self.table),
)
self._read_stmt.consistency_level = self.read_consistency
if write:
# Only possible writers "workers" are allowed to issue
# CREATE TABLE. This is to prevent conflicting situations
# where both task-creator and task-executor would issue it
# at the same time.
# Anyway; if you're doing anything critical, you should
# have created this table in advance, in which case
# this query will be a no-op (AlreadyExists)
make_stmt = cassandra.query.SimpleStatement(
Q_CREATE_RESULT_TABLE.format(table=self.table),
)
make_stmt.consistency_level = self.write_consistency
try:
self._session.execute(make_stmt)
except cassandra.AlreadyExists:
pass
except cassandra.OperationTimedOut:
# a heavily loaded or gone Cassandra cluster failed to respond.
# leave this class in a consistent state
if self._cluster is not None:
self._cluster.shutdown() # also shuts down _session
self._cluster = None
self._session = None
raise # we did fail after all - reraise
finally:
self._lock.release()
def _store_result(self, task_id, result, state,
traceback=None, request=None, **kwargs):
"""Store return value and state of an executed task."""
self._get_connection(write=True)
self._session.execute(self._write_stmt, (
task_id,
state,
buf_t(self.encode(result)),
self.app.now(),
buf_t(self.encode(traceback)),
buf_t(self.encode(self.current_task_children(request)))
))
def as_uri(self, include_password=True):
return 'cassandra://'
def _get_task_meta_for(self, task_id):
"""Get task meta-data for a task by id."""
self._get_connection()
res = self._session.execute(self._read_stmt, (task_id, )).one()
if not res:
return {'status': states.PENDING, 'result': None}
status, result, date_done, traceback, children = res
return self.meta_from_decoded({
'task_id': task_id,
'status': status,
'result': self.decode(result),
'date_done': date_done,
'traceback': self.decode(traceback),
'children': self.decode(children),
})
def __reduce__(self, args=(), kwargs=None):
kwargs = {} if not kwargs else kwargs
kwargs.update(
{'servers': self.servers,
'keyspace': self.keyspace,
'table': self.table})
return super().__reduce__(args, kwargs)

View File

@@ -0,0 +1,116 @@
"""Consul result store backend.
- :class:`ConsulBackend` implements KeyValueStoreBackend to store results
in the key-value store of Consul.
"""
from kombu.utils.encoding import bytes_to_str
from kombu.utils.url import parse_url
from celery.backends.base import KeyValueStoreBackend
from celery.exceptions import ImproperlyConfigured
from celery.utils.log import get_logger
try:
import consul
except ImportError:
consul = None
logger = get_logger(__name__)
__all__ = ('ConsulBackend',)
CONSUL_MISSING = """\
You need to install the python-consul library in order to use \
the Consul result store backend."""
class ConsulBackend(KeyValueStoreBackend):
"""Consul.io K/V store backend for Celery."""
consul = consul
supports_autoexpire = True
consistency = 'consistent'
path = None
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.consul is None:
raise ImproperlyConfigured(CONSUL_MISSING)
#
# By default, for correctness, we use a client connection per
# operation. If set, self.one_client will be used for all operations.
# This provides for the original behaviour to be selected, and is
# also convenient for mocking in the unit tests.
#
self.one_client = None
self._init_from_params(**parse_url(self.url))
def _init_from_params(self, hostname, port, virtual_host, **params):
logger.debug('Setting on Consul client to connect to %s:%d',
hostname, port)
self.path = virtual_host
self.hostname = hostname
self.port = port
#
# Optionally, allow a single client connection to be used to reduce
# the connection load on Consul by adding a "one_client=1" parameter
# to the URL.
#
if params.get('one_client', None):
self.one_client = self.client()
def client(self):
return self.one_client or consul.Consul(host=self.hostname,
port=self.port,
consistency=self.consistency)
def _key_to_consul_key(self, key):
key = bytes_to_str(key)
return key if self.path is None else f'{self.path}/{key}'
def get(self, key):
key = self._key_to_consul_key(key)
logger.debug('Trying to fetch key %s from Consul', key)
try:
_, data = self.client().kv.get(key)
return data['Value']
except TypeError:
pass
def mget(self, keys):
for key in keys:
yield self.get(key)
def set(self, key, value):
"""Set a key in Consul.
Before creating the key it will create a session inside Consul
where it creates a session with a TTL
The key created afterwards will reference to the session's ID.
If the session expires it will remove the key so that results
can auto expire from the K/V store
"""
session_name = bytes_to_str(key)
key = self._key_to_consul_key(key)
logger.debug('Trying to create Consul session %s with TTL %d',
session_name, self.expires)
client = self.client()
session_id = client.session.create(name=session_name,
behavior='delete',
ttl=self.expires)
logger.debug('Created Consul session %s', session_id)
logger.debug('Writing key %s to Consul', key)
return client.kv.put(key=key, value=value, acquire=session_id)
def delete(self, key):
key = self._key_to_consul_key(key)
logger.debug('Removing key %s from Consul', key)
return self.client().kv.delete(key)

View File

@@ -0,0 +1,218 @@
"""The CosmosDB/SQL backend for Celery (experimental)."""
from kombu.utils import cached_property
from kombu.utils.encoding import bytes_to_str
from kombu.utils.url import _parse_url
from celery.exceptions import ImproperlyConfigured
from celery.utils.log import get_logger
from .base import KeyValueStoreBackend
try:
import pydocumentdb
from pydocumentdb.document_client import DocumentClient
from pydocumentdb.documents import ConnectionPolicy, ConsistencyLevel, PartitionKind
from pydocumentdb.errors import HTTPFailure
from pydocumentdb.retry_options import RetryOptions
except ImportError:
pydocumentdb = DocumentClient = ConsistencyLevel = PartitionKind = \
HTTPFailure = ConnectionPolicy = RetryOptions = None
__all__ = ("CosmosDBSQLBackend",)
ERROR_NOT_FOUND = 404
ERROR_EXISTS = 409
LOGGER = get_logger(__name__)
class CosmosDBSQLBackend(KeyValueStoreBackend):
"""CosmosDB/SQL backend for Celery."""
def __init__(self,
url=None,
database_name=None,
collection_name=None,
consistency_level=None,
max_retry_attempts=None,
max_retry_wait_time=None,
*args,
**kwargs):
super().__init__(*args, **kwargs)
if pydocumentdb is None:
raise ImproperlyConfigured(
"You need to install the pydocumentdb library to use the "
"CosmosDB backend.")
conf = self.app.conf
self._endpoint, self._key = self._parse_url(url)
self._database_name = (
database_name or
conf["cosmosdbsql_database_name"])
self._collection_name = (
collection_name or
conf["cosmosdbsql_collection_name"])
try:
self._consistency_level = getattr(
ConsistencyLevel,
consistency_level or
conf["cosmosdbsql_consistency_level"])
except AttributeError:
raise ImproperlyConfigured("Unknown CosmosDB consistency level")
self._max_retry_attempts = (
max_retry_attempts or
conf["cosmosdbsql_max_retry_attempts"])
self._max_retry_wait_time = (
max_retry_wait_time or
conf["cosmosdbsql_max_retry_wait_time"])
@classmethod
def _parse_url(cls, url):
_, host, port, _, password, _, _ = _parse_url(url)
if not host or not password:
raise ImproperlyConfigured("Invalid URL")
if not port:
port = 443
scheme = "https" if port == 443 else "http"
endpoint = f"{scheme}://{host}:{port}"
return endpoint, password
@cached_property
def _client(self):
"""Return the CosmosDB/SQL client.
If this is the first call to the property, the client is created and
the database and collection are initialized if they don't yet exist.
"""
connection_policy = ConnectionPolicy()
connection_policy.RetryOptions = RetryOptions(
max_retry_attempt_count=self._max_retry_attempts,
max_wait_time_in_seconds=self._max_retry_wait_time)
client = DocumentClient(
self._endpoint,
{"masterKey": self._key},
connection_policy=connection_policy,
consistency_level=self._consistency_level)
self._create_database_if_not_exists(client)
self._create_collection_if_not_exists(client)
return client
def _create_database_if_not_exists(self, client):
try:
client.CreateDatabase({"id": self._database_name})
except HTTPFailure as ex:
if ex.status_code != ERROR_EXISTS:
raise
else:
LOGGER.info("Created CosmosDB database %s",
self._database_name)
def _create_collection_if_not_exists(self, client):
try:
client.CreateCollection(
self._database_link,
{"id": self._collection_name,
"partitionKey": {"paths": ["/id"],
"kind": PartitionKind.Hash}})
except HTTPFailure as ex:
if ex.status_code != ERROR_EXISTS:
raise
else:
LOGGER.info("Created CosmosDB collection %s/%s",
self._database_name, self._collection_name)
@cached_property
def _database_link(self):
return "dbs/" + self._database_name
@cached_property
def _collection_link(self):
return self._database_link + "/colls/" + self._collection_name
def _get_document_link(self, key):
return self._collection_link + "/docs/" + key
@classmethod
def _get_partition_key(cls, key):
if not key or key.isspace():
raise ValueError("Key cannot be none, empty or whitespace.")
return {"partitionKey": key}
def get(self, key):
"""Read the value stored at the given key.
Args:
key: The key for which to read the value.
"""
key = bytes_to_str(key)
LOGGER.debug("Getting CosmosDB document %s/%s/%s",
self._database_name, self._collection_name, key)
try:
document = self._client.ReadDocument(
self._get_document_link(key),
self._get_partition_key(key))
except HTTPFailure as ex:
if ex.status_code != ERROR_NOT_FOUND:
raise
return None
else:
return document.get("value")
def set(self, key, value):
"""Store a value for a given key.
Args:
key: The key at which to store the value.
value: The value to store.
"""
key = bytes_to_str(key)
LOGGER.debug("Creating CosmosDB document %s/%s/%s",
self._database_name, self._collection_name, key)
self._client.CreateDocument(
self._collection_link,
{"id": key, "value": value},
self._get_partition_key(key))
def mget(self, keys):
"""Read all the values for the provided keys.
Args:
keys: The list of keys to read.
"""
return [self.get(key) for key in keys]
def delete(self, key):
"""Delete the value at a given key.
Args:
key: The key of the value to delete.
"""
key = bytes_to_str(key)
LOGGER.debug("Deleting CosmosDB document %s/%s/%s",
self._database_name, self._collection_name, key)
self._client.DeleteDocument(
self._get_document_link(key),
self._get_partition_key(key))

View File

@@ -0,0 +1,114 @@
"""Couchbase result store backend."""
from kombu.utils.url import _parse_url
from celery.exceptions import ImproperlyConfigured
from .base import KeyValueStoreBackend
try:
from couchbase.auth import PasswordAuthenticator
from couchbase.cluster import Cluster
except ImportError:
Cluster = PasswordAuthenticator = None
try:
from couchbase_core._libcouchbase import FMT_AUTO
except ImportError:
FMT_AUTO = None
__all__ = ('CouchbaseBackend',)
class CouchbaseBackend(KeyValueStoreBackend):
"""Couchbase backend.
Raises:
celery.exceptions.ImproperlyConfigured:
if module :pypi:`couchbase` is not available.
"""
bucket = 'default'
host = 'localhost'
port = 8091
username = None
password = None
quiet = False
supports_autoexpire = True
timeout = 2.5
# Use str as couchbase key not bytes
key_t = str
def __init__(self, url=None, *args, **kwargs):
kwargs.setdefault('expires_type', int)
super().__init__(*args, **kwargs)
self.url = url
if Cluster is None:
raise ImproperlyConfigured(
'You need to install the couchbase library to use the '
'Couchbase backend.',
)
uhost = uport = uname = upass = ubucket = None
if url:
_, uhost, uport, uname, upass, ubucket, _ = _parse_url(url)
ubucket = ubucket.strip('/') if ubucket else None
config = self.app.conf.get('couchbase_backend_settings', None)
if config is not None:
if not isinstance(config, dict):
raise ImproperlyConfigured(
'Couchbase backend settings should be grouped in a dict',
)
else:
config = {}
self.host = uhost or config.get('host', self.host)
self.port = int(uport or config.get('port', self.port))
self.bucket = ubucket or config.get('bucket', self.bucket)
self.username = uname or config.get('username', self.username)
self.password = upass or config.get('password', self.password)
self._connection = None
def _get_connection(self):
"""Connect to the Couchbase server."""
if self._connection is None:
if self.host and self.port:
uri = f"couchbase://{self.host}:{self.port}"
else:
uri = f"couchbase://{self.host}"
if self.username and self.password:
opt = PasswordAuthenticator(self.username, self.password)
else:
opt = None
cluster = Cluster(uri, opt)
bucket = cluster.bucket(self.bucket)
self._connection = bucket.default_collection()
return self._connection
@property
def connection(self):
return self._get_connection()
def get(self, key):
return self.connection.get(key).content
def set(self, key, value):
# Since 4.0.0 value is JSONType in couchbase lib, so parameter format isn't needed
if FMT_AUTO is not None:
self.connection.upsert(key, value, ttl=self.expires, format=FMT_AUTO)
else:
self.connection.upsert(key, value, ttl=self.expires)
def mget(self, keys):
return self.connection.get_multi(keys)
def delete(self, key):
self.connection.remove(key)

View File

@@ -0,0 +1,99 @@
"""CouchDB result store backend."""
from kombu.utils.encoding import bytes_to_str
from kombu.utils.url import _parse_url
from celery.exceptions import ImproperlyConfigured
from .base import KeyValueStoreBackend
try:
import pycouchdb
except ImportError:
pycouchdb = None
__all__ = ('CouchBackend',)
ERR_LIB_MISSING = """\
You need to install the pycouchdb library to use the CouchDB result backend\
"""
class CouchBackend(KeyValueStoreBackend):
"""CouchDB backend.
Raises:
celery.exceptions.ImproperlyConfigured:
if module :pypi:`pycouchdb` is not available.
"""
container = 'default'
scheme = 'http'
host = 'localhost'
port = 5984
username = None
password = None
def __init__(self, url=None, *args, **kwargs):
super().__init__(*args, **kwargs)
self.url = url
if pycouchdb is None:
raise ImproperlyConfigured(ERR_LIB_MISSING)
uscheme = uhost = uport = uname = upass = ucontainer = None
if url:
_, uhost, uport, uname, upass, ucontainer, _ = _parse_url(url)
ucontainer = ucontainer.strip('/') if ucontainer else None
self.scheme = uscheme or self.scheme
self.host = uhost or self.host
self.port = int(uport or self.port)
self.container = ucontainer or self.container
self.username = uname or self.username
self.password = upass or self.password
self._connection = None
def _get_connection(self):
"""Connect to the CouchDB server."""
if self.username and self.password:
conn_string = f'{self.scheme}://{self.username}:{self.password}@{self.host}:{self.port}'
server = pycouchdb.Server(conn_string, authmethod='basic')
else:
conn_string = f'{self.scheme}://{self.host}:{self.port}'
server = pycouchdb.Server(conn_string)
try:
return server.database(self.container)
except pycouchdb.exceptions.NotFound:
return server.create(self.container)
@property
def connection(self):
if self._connection is None:
self._connection = self._get_connection()
return self._connection
def get(self, key):
key = bytes_to_str(key)
try:
return self.connection.get(key)['value']
except pycouchdb.exceptions.NotFound:
return None
def set(self, key, value):
key = bytes_to_str(key)
data = {'_id': key, 'value': value}
try:
self.connection.save(data)
except pycouchdb.exceptions.Conflict:
# document already exists, update it
data = self.connection.get(key)
data['value'] = value
self.connection.save(data)
def mget(self, keys):
return [self.get(key) for key in keys]
def delete(self, key):
self.connection.delete(key)

View File

@@ -0,0 +1,222 @@
"""SQLAlchemy result store backend."""
import logging
from contextlib import contextmanager
from vine.utils import wraps
from celery import states
from celery.backends.base import BaseBackend
from celery.exceptions import ImproperlyConfigured
from celery.utils.time import maybe_timedelta
from .models import Task, TaskExtended, TaskSet
from .session import SessionManager
try:
from sqlalchemy.exc import DatabaseError, InvalidRequestError
from sqlalchemy.orm.exc import StaleDataError
except ImportError:
raise ImproperlyConfigured(
'The database result backend requires SQLAlchemy to be installed.'
'See https://pypi.org/project/SQLAlchemy/')
logger = logging.getLogger(__name__)
__all__ = ('DatabaseBackend',)
@contextmanager
def session_cleanup(session):
try:
yield
except Exception:
session.rollback()
raise
finally:
session.close()
def retry(fun):
@wraps(fun)
def _inner(*args, **kwargs):
max_retries = kwargs.pop('max_retries', 3)
for retries in range(max_retries):
try:
return fun(*args, **kwargs)
except (DatabaseError, InvalidRequestError, StaleDataError):
logger.warning(
'Failed operation %s. Retrying %s more times.',
fun.__name__, max_retries - retries - 1,
exc_info=True)
if retries + 1 >= max_retries:
raise
return _inner
class DatabaseBackend(BaseBackend):
"""The database result backend."""
# ResultSet.iterate should sleep this much between each pool,
# to not bombard the database with queries.
subpolling_interval = 0.5
task_cls = Task
taskset_cls = TaskSet
def __init__(self, dburi=None, engine_options=None, url=None, **kwargs):
# The `url` argument was added later and is used by
# the app to set backend by url (celery.app.backends.by_url)
super().__init__(expires_type=maybe_timedelta,
url=url, **kwargs)
conf = self.app.conf
if self.extended_result:
self.task_cls = TaskExtended
self.url = url or dburi or conf.database_url
self.engine_options = dict(
engine_options or {},
**conf.database_engine_options or {})
self.short_lived_sessions = kwargs.get(
'short_lived_sessions',
conf.database_short_lived_sessions)
schemas = conf.database_table_schemas or {}
tablenames = conf.database_table_names or {}
self.task_cls.configure(
schema=schemas.get('task'),
name=tablenames.get('task'))
self.taskset_cls.configure(
schema=schemas.get('group'),
name=tablenames.get('group'))
if not self.url:
raise ImproperlyConfigured(
'Missing connection string! Do you have the'
' database_url setting set to a real value?')
@property
def extended_result(self):
return self.app.conf.find_value_for_key('extended', 'result')
def ResultSession(self, session_manager=SessionManager()):
return session_manager.session_factory(
dburi=self.url,
short_lived_sessions=self.short_lived_sessions,
**self.engine_options)
@retry
def _store_result(self, task_id, result, state, traceback=None,
request=None, **kwargs):
"""Store return value and state of an executed task."""
session = self.ResultSession()
with session_cleanup(session):
task = list(session.query(self.task_cls).filter(self.task_cls.task_id == task_id))
task = task and task[0]
if not task:
task = self.task_cls(task_id)
task.task_id = task_id
session.add(task)
session.flush()
self._update_result(task, result, state, traceback=traceback, request=request)
session.commit()
def _update_result(self, task, result, state, traceback=None,
request=None):
meta = self._get_result_meta(result=result, state=state,
traceback=traceback, request=request,
format_date=False, encode=True)
# Exclude the primary key id and task_id columns
# as we should not set it None
columns = [column.name for column in self.task_cls.__table__.columns
if column.name not in {'id', 'task_id'}]
# Iterate through the columns name of the table
# to set the value from meta.
# If the value is not present in meta, set None
for column in columns:
value = meta.get(column)
setattr(task, column, value)
@retry
def _get_task_meta_for(self, task_id):
"""Get task meta-data for a task by id."""
session = self.ResultSession()
with session_cleanup(session):
task = list(session.query(self.task_cls).filter(self.task_cls.task_id == task_id))
task = task and task[0]
if not task:
task = self.task_cls(task_id)
task.status = states.PENDING
task.result = None
data = task.to_dict()
if data.get('args', None) is not None:
data['args'] = self.decode(data['args'])
if data.get('kwargs', None) is not None:
data['kwargs'] = self.decode(data['kwargs'])
return self.meta_from_decoded(data)
@retry
def _save_group(self, group_id, result):
"""Store the result of an executed group."""
session = self.ResultSession()
with session_cleanup(session):
group = self.taskset_cls(group_id, result)
session.add(group)
session.flush()
session.commit()
return result
@retry
def _restore_group(self, group_id):
"""Get meta-data for group by id."""
session = self.ResultSession()
with session_cleanup(session):
group = session.query(self.taskset_cls).filter(
self.taskset_cls.taskset_id == group_id).first()
if group:
return group.to_dict()
@retry
def _delete_group(self, group_id):
"""Delete meta-data for group by id."""
session = self.ResultSession()
with session_cleanup(session):
session.query(self.taskset_cls).filter(
self.taskset_cls.taskset_id == group_id).delete()
session.flush()
session.commit()
@retry
def _forget(self, task_id):
"""Forget about result."""
session = self.ResultSession()
with session_cleanup(session):
session.query(self.task_cls).filter(self.task_cls.task_id == task_id).delete()
session.commit()
def cleanup(self):
"""Delete expired meta-data."""
session = self.ResultSession()
expires = self.expires
now = self.app.now()
with session_cleanup(session):
session.query(self.task_cls).filter(
self.task_cls.date_done < (now - expires)).delete()
session.query(self.taskset_cls).filter(
self.taskset_cls.date_done < (now - expires)).delete()
session.commit()
def __reduce__(self, args=(), kwargs=None):
kwargs = {} if not kwargs else kwargs
kwargs.update(
{'dburi': self.url,
'expires': self.expires,
'engine_options': self.engine_options})
return super().__reduce__(args, kwargs)

View File

@@ -0,0 +1,108 @@
"""Database models used by the SQLAlchemy result store backend."""
from datetime import datetime
import sqlalchemy as sa
from sqlalchemy.types import PickleType
from celery import states
from .session import ResultModelBase
__all__ = ('Task', 'TaskExtended', 'TaskSet')
class Task(ResultModelBase):
"""Task result/status."""
__tablename__ = 'celery_taskmeta'
__table_args__ = {'sqlite_autoincrement': True}
id = sa.Column(sa.Integer, sa.Sequence('task_id_sequence'),
primary_key=True, autoincrement=True)
task_id = sa.Column(sa.String(155), unique=True)
status = sa.Column(sa.String(50), default=states.PENDING)
result = sa.Column(PickleType, nullable=True)
date_done = sa.Column(sa.DateTime, default=datetime.utcnow,
onupdate=datetime.utcnow, nullable=True)
traceback = sa.Column(sa.Text, nullable=True)
def __init__(self, task_id):
self.task_id = task_id
def to_dict(self):
return {
'task_id': self.task_id,
'status': self.status,
'result': self.result,
'traceback': self.traceback,
'date_done': self.date_done,
}
def __repr__(self):
return '<Task {0.task_id} state: {0.status}>'.format(self)
@classmethod
def configure(cls, schema=None, name=None):
cls.__table__.schema = schema
cls.id.default.schema = schema
cls.__table__.name = name or cls.__tablename__
class TaskExtended(Task):
"""For the extend result."""
__tablename__ = 'celery_taskmeta'
__table_args__ = {'sqlite_autoincrement': True, 'extend_existing': True}
name = sa.Column(sa.String(155), nullable=True)
args = sa.Column(sa.LargeBinary, nullable=True)
kwargs = sa.Column(sa.LargeBinary, nullable=True)
worker = sa.Column(sa.String(155), nullable=True)
retries = sa.Column(sa.Integer, nullable=True)
queue = sa.Column(sa.String(155), nullable=True)
def to_dict(self):
task_dict = super().to_dict()
task_dict.update({
'name': self.name,
'args': self.args,
'kwargs': self.kwargs,
'worker': self.worker,
'retries': self.retries,
'queue': self.queue,
})
return task_dict
class TaskSet(ResultModelBase):
"""TaskSet result."""
__tablename__ = 'celery_tasksetmeta'
__table_args__ = {'sqlite_autoincrement': True}
id = sa.Column(sa.Integer, sa.Sequence('taskset_id_sequence'),
autoincrement=True, primary_key=True)
taskset_id = sa.Column(sa.String(155), unique=True)
result = sa.Column(PickleType, nullable=True)
date_done = sa.Column(sa.DateTime, default=datetime.utcnow,
nullable=True)
def __init__(self, taskset_id, result):
self.taskset_id = taskset_id
self.result = result
def to_dict(self):
return {
'taskset_id': self.taskset_id,
'result': self.result,
'date_done': self.date_done,
}
def __repr__(self):
return f'<TaskSet: {self.taskset_id}>'
@classmethod
def configure(cls, schema=None, name=None):
cls.__table__.schema = schema
cls.id.default.schema = schema
cls.__table__.name = name or cls.__tablename__

View File

@@ -0,0 +1,89 @@
"""SQLAlchemy session."""
import time
from kombu.utils.compat import register_after_fork
from sqlalchemy import create_engine
from sqlalchemy.exc import DatabaseError
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import NullPool
from celery.utils.time import get_exponential_backoff_interval
try:
from sqlalchemy.orm import declarative_base
except ImportError:
# TODO: Remove this once we drop support for SQLAlchemy < 1.4.
from sqlalchemy.ext.declarative import declarative_base
ResultModelBase = declarative_base()
__all__ = ('SessionManager',)
PREPARE_MODELS_MAX_RETRIES = 10
def _after_fork_cleanup_session(session):
session._after_fork()
class SessionManager:
"""Manage SQLAlchemy sessions."""
def __init__(self):
self._engines = {}
self._sessions = {}
self.forked = False
self.prepared = False
if register_after_fork is not None:
register_after_fork(self, _after_fork_cleanup_session)
def _after_fork(self):
self.forked = True
def get_engine(self, dburi, **kwargs):
if self.forked:
try:
return self._engines[dburi]
except KeyError:
engine = self._engines[dburi] = create_engine(dburi, **kwargs)
return engine
else:
kwargs = {k: v for k, v in kwargs.items() if
not k.startswith('pool')}
return create_engine(dburi, poolclass=NullPool, **kwargs)
def create_session(self, dburi, short_lived_sessions=False, **kwargs):
engine = self.get_engine(dburi, **kwargs)
if self.forked:
if short_lived_sessions or dburi not in self._sessions:
self._sessions[dburi] = sessionmaker(bind=engine)
return engine, self._sessions[dburi]
return engine, sessionmaker(bind=engine)
def prepare_models(self, engine):
if not self.prepared:
# SQLAlchemy will check if the items exist before trying to
# create them, which is a race condition. If it raises an error
# in one iteration, the next may pass all the existence checks
# and the call will succeed.
retries = 0
while True:
try:
ResultModelBase.metadata.create_all(engine)
except DatabaseError:
if retries < PREPARE_MODELS_MAX_RETRIES:
sleep_amount_ms = get_exponential_backoff_interval(
10, retries, 1000, True
)
time.sleep(sleep_amount_ms / 1000)
retries += 1
else:
raise
else:
break
self.prepared = True
def session_factory(self, dburi, **kwargs):
engine, session = self.create_session(dburi, **kwargs)
self.prepare_models(engine)
return session()

View File

@@ -0,0 +1,493 @@
"""AWS DynamoDB result store backend."""
from collections import namedtuple
from time import sleep, time
from kombu.utils.url import _parse_url as parse_url
from celery.exceptions import ImproperlyConfigured
from celery.utils.log import get_logger
from .base import KeyValueStoreBackend
try:
import boto3
from botocore.exceptions import ClientError
except ImportError:
boto3 = ClientError = None
__all__ = ('DynamoDBBackend',)
# Helper class that describes a DynamoDB attribute
DynamoDBAttribute = namedtuple('DynamoDBAttribute', ('name', 'data_type'))
logger = get_logger(__name__)
class DynamoDBBackend(KeyValueStoreBackend):
"""AWS DynamoDB result backend.
Raises:
celery.exceptions.ImproperlyConfigured:
if module :pypi:`boto3` is not available.
"""
#: default DynamoDB table name (`default`)
table_name = 'celery'
#: Read Provisioned Throughput (`default`)
read_capacity_units = 1
#: Write Provisioned Throughput (`default`)
write_capacity_units = 1
#: AWS region (`default`)
aws_region = None
#: The endpoint URL that is passed to boto3 (local DynamoDB) (`default`)
endpoint_url = None
#: Item time-to-live in seconds (`default`)
time_to_live_seconds = None
# DynamoDB supports Time to Live as an auto-expiry mechanism.
supports_autoexpire = True
_key_field = DynamoDBAttribute(name='id', data_type='S')
_value_field = DynamoDBAttribute(name='result', data_type='B')
_timestamp_field = DynamoDBAttribute(name='timestamp', data_type='N')
_ttl_field = DynamoDBAttribute(name='ttl', data_type='N')
_available_fields = None
def __init__(self, url=None, table_name=None, *args, **kwargs):
super().__init__(*args, **kwargs)
self.url = url
self.table_name = table_name or self.table_name
if not boto3:
raise ImproperlyConfigured(
'You need to install the boto3 library to use the '
'DynamoDB backend.')
aws_credentials_given = False
aws_access_key_id = None
aws_secret_access_key = None
if url is not None:
scheme, region, port, username, password, table, query = \
parse_url(url)
aws_access_key_id = username
aws_secret_access_key = password
access_key_given = aws_access_key_id is not None
secret_key_given = aws_secret_access_key is not None
if access_key_given != secret_key_given:
raise ImproperlyConfigured(
'You need to specify both the Access Key ID '
'and Secret.')
aws_credentials_given = access_key_given
if region == 'localhost':
# We are using the downloadable, local version of DynamoDB
self.endpoint_url = f'http://localhost:{port}'
self.aws_region = 'us-east-1'
logger.warning(
'Using local-only DynamoDB endpoint URL: {}'.format(
self.endpoint_url
)
)
else:
self.aws_region = region
# If endpoint_url is explicitly set use it instead
_get = self.app.conf.get
config_endpoint_url = _get('dynamodb_endpoint_url')
if config_endpoint_url:
self.endpoint_url = config_endpoint_url
self.read_capacity_units = int(
query.get(
'read',
self.read_capacity_units
)
)
self.write_capacity_units = int(
query.get(
'write',
self.write_capacity_units
)
)
ttl = query.get('ttl_seconds', self.time_to_live_seconds)
if ttl:
try:
self.time_to_live_seconds = int(ttl)
except ValueError as e:
logger.error(
f'TTL must be a number; got "{ttl}"',
exc_info=e
)
raise e
self.table_name = table or self.table_name
self._available_fields = (
self._key_field,
self._value_field,
self._timestamp_field
)
self._client = None
if aws_credentials_given:
self._get_client(
access_key_id=aws_access_key_id,
secret_access_key=aws_secret_access_key
)
def _get_client(self, access_key_id=None, secret_access_key=None):
"""Get client connection."""
if self._client is None:
client_parameters = {
'region_name': self.aws_region
}
if access_key_id is not None:
client_parameters.update({
'aws_access_key_id': access_key_id,
'aws_secret_access_key': secret_access_key
})
if self.endpoint_url is not None:
client_parameters['endpoint_url'] = self.endpoint_url
self._client = boto3.client(
'dynamodb',
**client_parameters
)
self._get_or_create_table()
if self._has_ttl() is not None:
self._validate_ttl_methods()
self._set_table_ttl()
return self._client
def _get_table_schema(self):
"""Get the boto3 structure describing the DynamoDB table schema."""
return {
'AttributeDefinitions': [
{
'AttributeName': self._key_field.name,
'AttributeType': self._key_field.data_type
}
],
'TableName': self.table_name,
'KeySchema': [
{
'AttributeName': self._key_field.name,
'KeyType': 'HASH'
}
],
'ProvisionedThroughput': {
'ReadCapacityUnits': self.read_capacity_units,
'WriteCapacityUnits': self.write_capacity_units
}
}
def _get_or_create_table(self):
"""Create table if not exists, otherwise return the description."""
table_schema = self._get_table_schema()
try:
return self._client.describe_table(TableName=self.table_name)
except ClientError as e:
error_code = e.response['Error'].get('Code', 'Unknown')
if error_code == 'ResourceNotFoundException':
table_description = self._client.create_table(**table_schema)
logger.info(
'DynamoDB Table {} did not exist, creating.'.format(
self.table_name
)
)
# In case we created the table, wait until it becomes available.
self._wait_for_table_status('ACTIVE')
logger.info(
'DynamoDB Table {} is now available.'.format(
self.table_name
)
)
return table_description
else:
raise e
def _has_ttl(self):
"""Return the desired Time to Live config.
- True: Enable TTL on the table; use expiry.
- False: Disable TTL on the table; don't use expiry.
- None: Ignore TTL on the table; don't use expiry.
"""
return None if self.time_to_live_seconds is None \
else self.time_to_live_seconds >= 0
def _validate_ttl_methods(self):
"""Verify boto support for the DynamoDB Time to Live methods."""
# Required TTL methods.
required_methods = (
'update_time_to_live',
'describe_time_to_live',
)
# Find missing methods.
missing_methods = []
for method in list(required_methods):
if not hasattr(self._client, method):
missing_methods.append(method)
if missing_methods:
logger.error(
(
'boto3 method(s) {methods} not found; ensure that '
'boto3>=1.9.178 and botocore>=1.12.178 are installed'
).format(
methods=','.join(missing_methods)
)
)
raise AttributeError(
'boto3 method(s) {methods} not found'.format(
methods=','.join(missing_methods)
)
)
def _get_ttl_specification(self, ttl_attr_name):
"""Get the boto3 structure describing the DynamoDB TTL specification."""
return {
'TableName': self.table_name,
'TimeToLiveSpecification': {
'Enabled': self._has_ttl(),
'AttributeName': ttl_attr_name
}
}
def _get_table_ttl_description(self):
# Get the current TTL description.
try:
description = self._client.describe_time_to_live(
TableName=self.table_name
)
except ClientError as e:
error_code = e.response['Error'].get('Code', 'Unknown')
error_message = e.response['Error'].get('Message', 'Unknown')
logger.error((
'Error describing Time to Live on DynamoDB table {table}: '
'{code}: {message}'
).format(
table=self.table_name,
code=error_code,
message=error_message,
))
raise e
return description
def _set_table_ttl(self):
"""Enable or disable Time to Live on the table."""
# Get the table TTL description, and return early when possible.
description = self._get_table_ttl_description()
status = description['TimeToLiveDescription']['TimeToLiveStatus']
if status in ('ENABLED', 'ENABLING'):
cur_attr_name = \
description['TimeToLiveDescription']['AttributeName']
if self._has_ttl():
if cur_attr_name == self._ttl_field.name:
# We want TTL enabled, and it is currently enabled or being
# enabled, and on the correct attribute.
logger.debug((
'DynamoDB Time to Live is {situation} '
'on table {table}'
).format(
situation='already enabled'
if status == 'ENABLED'
else 'currently being enabled',
table=self.table_name
))
return description
elif status in ('DISABLED', 'DISABLING'):
if not self._has_ttl():
# We want TTL disabled, and it is currently disabled or being
# disabled.
logger.debug((
'DynamoDB Time to Live is {situation} '
'on table {table}'
).format(
situation='already disabled'
if status == 'DISABLED'
else 'currently being disabled',
table=self.table_name
))
return description
# The state shouldn't ever have any value beyond the four handled
# above, but to ease troubleshooting of potential future changes, emit
# a log showing the unknown state.
else: # pragma: no cover
logger.warning((
'Unknown DynamoDB Time to Live status {status} '
'on table {table}. Attempting to continue.'
).format(
status=status,
table=self.table_name
))
# At this point, we have one of the following situations:
#
# We want TTL enabled,
#
# - and it's currently disabled: Try to enable.
#
# - and it's being disabled: Try to enable, but this is almost sure to
# raise ValidationException with message:
#
# Time to live has been modified multiple times within a fixed
# interval
#
# - and it's currently enabling or being enabled, but on the wrong
# attribute: Try to enable, but this will raise ValidationException
# with message:
#
# TimeToLive is active on a different AttributeName: current
# AttributeName is ttlx
#
# We want TTL disabled,
#
# - and it's currently enabled: Try to disable.
#
# - and it's being enabled: Try to disable, but this is almost sure to
# raise ValidationException with message:
#
# Time to live has been modified multiple times within a fixed
# interval
#
attr_name = \
cur_attr_name if status == 'ENABLED' else self._ttl_field.name
try:
specification = self._client.update_time_to_live(
**self._get_ttl_specification(
ttl_attr_name=attr_name
)
)
logger.info(
(
'DynamoDB table Time to Live updated: '
'table={table} enabled={enabled} attribute={attr}'
).format(
table=self.table_name,
enabled=self._has_ttl(),
attr=self._ttl_field.name
)
)
return specification
except ClientError as e:
error_code = e.response['Error'].get('Code', 'Unknown')
error_message = e.response['Error'].get('Message', 'Unknown')
logger.error((
'Error {action} Time to Live on DynamoDB table {table}: '
'{code}: {message}'
).format(
action='enabling' if self._has_ttl() else 'disabling',
table=self.table_name,
code=error_code,
message=error_message,
))
raise e
def _wait_for_table_status(self, expected='ACTIVE'):
"""Poll for the expected table status."""
achieved_state = False
while not achieved_state:
table_description = self.client.describe_table(
TableName=self.table_name
)
logger.debug(
'Waiting for DynamoDB table {} to become {}.'.format(
self.table_name,
expected
)
)
current_status = table_description['Table']['TableStatus']
achieved_state = current_status == expected
sleep(1)
def _prepare_get_request(self, key):
"""Construct the item retrieval request parameters."""
return {
'TableName': self.table_name,
'Key': {
self._key_field.name: {
self._key_field.data_type: key
}
}
}
def _prepare_put_request(self, key, value):
"""Construct the item creation request parameters."""
timestamp = time()
put_request = {
'TableName': self.table_name,
'Item': {
self._key_field.name: {
self._key_field.data_type: key
},
self._value_field.name: {
self._value_field.data_type: value
},
self._timestamp_field.name: {
self._timestamp_field.data_type: str(timestamp)
}
}
}
if self._has_ttl():
put_request['Item'].update({
self._ttl_field.name: {
self._ttl_field.data_type:
str(int(timestamp + self.time_to_live_seconds))
}
})
return put_request
def _item_to_dict(self, raw_response):
"""Convert get_item() response to field-value pairs."""
if 'Item' not in raw_response:
return {}
return {
field.name: raw_response['Item'][field.name][field.data_type]
for field in self._available_fields
}
@property
def client(self):
return self._get_client()
def get(self, key):
key = str(key)
request_parameters = self._prepare_get_request(key)
item_response = self.client.get_item(**request_parameters)
item = self._item_to_dict(item_response)
return item.get(self._value_field.name)
def set(self, key, value):
key = str(key)
request_parameters = self._prepare_put_request(key, value)
self.client.put_item(**request_parameters)
def mget(self, keys):
return [self.get(key) for key in keys]
def delete(self, key):
key = str(key)
request_parameters = self._prepare_get_request(key)
self.client.delete_item(**request_parameters)

View File

@@ -0,0 +1,248 @@
"""Elasticsearch result store backend."""
from datetime import datetime
from kombu.utils.encoding import bytes_to_str
from kombu.utils.url import _parse_url
from celery import states
from celery.exceptions import ImproperlyConfigured
from .base import KeyValueStoreBackend
try:
import elasticsearch
except ImportError:
elasticsearch = None
__all__ = ('ElasticsearchBackend',)
E_LIB_MISSING = """\
You need to install the elasticsearch library to use the Elasticsearch \
result backend.\
"""
class ElasticsearchBackend(KeyValueStoreBackend):
"""Elasticsearch Backend.
Raises:
celery.exceptions.ImproperlyConfigured:
if module :pypi:`elasticsearch` is not available.
"""
index = 'celery'
doc_type = 'backend'
scheme = 'http'
host = 'localhost'
port = 9200
username = None
password = None
es_retry_on_timeout = False
es_timeout = 10
es_max_retries = 3
def __init__(self, url=None, *args, **kwargs):
super().__init__(*args, **kwargs)
self.url = url
_get = self.app.conf.get
if elasticsearch is None:
raise ImproperlyConfigured(E_LIB_MISSING)
index = doc_type = scheme = host = port = username = password = None
if url:
scheme, host, port, username, password, path, _ = _parse_url(url)
if scheme == 'elasticsearch':
scheme = None
if path:
path = path.strip('/')
index, _, doc_type = path.partition('/')
self.index = index or self.index
self.doc_type = doc_type or self.doc_type
self.scheme = scheme or self.scheme
self.host = host or self.host
self.port = port or self.port
self.username = username or self.username
self.password = password or self.password
self.es_retry_on_timeout = (
_get('elasticsearch_retry_on_timeout') or self.es_retry_on_timeout
)
es_timeout = _get('elasticsearch_timeout')
if es_timeout is not None:
self.es_timeout = es_timeout
es_max_retries = _get('elasticsearch_max_retries')
if es_max_retries is not None:
self.es_max_retries = es_max_retries
self.es_save_meta_as_text = _get('elasticsearch_save_meta_as_text', True)
self._server = None
def exception_safe_to_retry(self, exc):
if isinstance(exc, (elasticsearch.exceptions.TransportError)):
# 401: Unauthorized
# 409: Conflict
# 429: Too Many Requests
# 500: Internal Server Error
# 502: Bad Gateway
# 503: Service Unavailable
# 504: Gateway Timeout
# N/A: Low level exception (i.e. socket exception)
if exc.status_code in {401, 409, 429, 500, 502, 503, 504, 'N/A'}:
return True
return False
def get(self, key):
try:
res = self._get(key)
try:
if res['found']:
return res['_source']['result']
except (TypeError, KeyError):
pass
except elasticsearch.exceptions.NotFoundError:
pass
def _get(self, key):
return self.server.get(
index=self.index,
doc_type=self.doc_type,
id=key,
)
def _set_with_state(self, key, value, state):
body = {
'result': value,
'@timestamp': '{}Z'.format(
datetime.utcnow().isoformat()[:-3]
),
}
try:
self._index(
id=key,
body=body,
)
except elasticsearch.exceptions.ConflictError:
# document already exists, update it
self._update(key, body, state)
def set(self, key, value):
return self._set_with_state(key, value, None)
def _index(self, id, body, **kwargs):
body = {bytes_to_str(k): v for k, v in body.items()}
return self.server.index(
id=bytes_to_str(id),
index=self.index,
doc_type=self.doc_type,
body=body,
params={'op_type': 'create'},
**kwargs
)
def _update(self, id, body, state, **kwargs):
"""Update state in a conflict free manner.
If state is defined (not None), this will not update ES server if either:
* existing state is success
* existing state is a ready state and current state in not a ready state
This way, a Retry state cannot override a Success or Failure, and chord_unlock
will not retry indefinitely.
"""
body = {bytes_to_str(k): v for k, v in body.items()}
try:
res_get = self._get(key=id)
if not res_get.get('found'):
return self._index(id, body, **kwargs)
# document disappeared between index and get calls.
except elasticsearch.exceptions.NotFoundError:
return self._index(id, body, **kwargs)
try:
meta_present_on_backend = self.decode_result(res_get['_source']['result'])
except (TypeError, KeyError):
pass
else:
if meta_present_on_backend['status'] == states.SUCCESS:
# if stored state is already in success, do nothing
return {'result': 'noop'}
elif meta_present_on_backend['status'] in states.READY_STATES and state in states.UNREADY_STATES:
# if stored state is in ready state and current not, do nothing
return {'result': 'noop'}
# get current sequence number and primary term
# https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
seq_no = res_get.get('_seq_no', 1)
prim_term = res_get.get('_primary_term', 1)
# try to update document with current seq_no and primary_term
res = self.server.update(
id=bytes_to_str(id),
index=self.index,
doc_type=self.doc_type,
body={'doc': body},
params={'if_primary_term': prim_term, 'if_seq_no': seq_no},
**kwargs
)
# result is elastic search update query result
# noop = query did not update any document
# updated = at least one document got updated
if res['result'] == 'noop':
raise elasticsearch.exceptions.ConflictError(409, 'conflicting update occurred concurrently', {})
return res
def encode(self, data):
if self.es_save_meta_as_text:
return super().encode(data)
else:
if not isinstance(data, dict):
return super().encode(data)
if data.get("result"):
data["result"] = self._encode(data["result"])[2]
if data.get("traceback"):
data["traceback"] = self._encode(data["traceback"])[2]
return data
def decode(self, payload):
if self.es_save_meta_as_text:
return super().decode(payload)
else:
if not isinstance(payload, dict):
return super().decode(payload)
if payload.get("result"):
payload["result"] = super().decode(payload["result"])
if payload.get("traceback"):
payload["traceback"] = super().decode(payload["traceback"])
return payload
def mget(self, keys):
return [self.get(key) for key in keys]
def delete(self, key):
self.server.delete(index=self.index, doc_type=self.doc_type, id=key)
def _get_server(self):
"""Connect to the Elasticsearch server."""
http_auth = None
if self.username and self.password:
http_auth = (self.username, self.password)
return elasticsearch.Elasticsearch(
f'{self.host}:{self.port}',
retry_on_timeout=self.es_retry_on_timeout,
max_retries=self.es_max_retries,
timeout=self.es_timeout,
scheme=self.scheme,
http_auth=http_auth,
)
@property
def server(self):
if self._server is None:
self._server = self._get_server()
return self._server

View File

@@ -0,0 +1,112 @@
"""File-system result store backend."""
import locale
import os
from datetime import datetime
from kombu.utils.encoding import ensure_bytes
from celery import uuid
from celery.backends.base import KeyValueStoreBackend
from celery.exceptions import ImproperlyConfigured
default_encoding = locale.getpreferredencoding(False)
E_NO_PATH_SET = 'You need to configure a path for the file-system backend'
E_PATH_NON_CONFORMING_SCHEME = (
'A path for the file-system backend should conform to the file URI scheme'
)
E_PATH_INVALID = """\
The configured path for the file-system backend does not
work correctly, please make sure that it exists and has
the correct permissions.\
"""
class FilesystemBackend(KeyValueStoreBackend):
"""File-system result backend.
Arguments:
url (str): URL to the directory we should use
open (Callable): open function to use when opening files
unlink (Callable): unlink function to use when deleting files
sep (str): directory separator (to join the directory with the key)
encoding (str): encoding used on the file-system
"""
def __init__(self, url=None, open=open, unlink=os.unlink, sep=os.sep,
encoding=default_encoding, *args, **kwargs):
super().__init__(*args, **kwargs)
self.url = url
path = self._find_path(url)
# Remove forwarding "/" for Windows os
if os.name == "nt" and path.startswith("/"):
path = path[1:]
# We need the path and separator as bytes objects
self.path = path.encode(encoding)
self.sep = sep.encode(encoding)
self.open = open
self.unlink = unlink
# Lets verify that we've everything setup right
self._do_directory_test(b'.fs-backend-' + uuid().encode(encoding))
def __reduce__(self, args=(), kwargs=None):
kwargs = {} if not kwargs else kwargs
return super().__reduce__(args, {**kwargs, 'url': self.url})
def _find_path(self, url):
if not url:
raise ImproperlyConfigured(E_NO_PATH_SET)
if url.startswith('file://localhost/'):
return url[16:]
if url.startswith('file://'):
return url[7:]
raise ImproperlyConfigured(E_PATH_NON_CONFORMING_SCHEME)
def _do_directory_test(self, key):
try:
self.set(key, b'test value')
assert self.get(key) == b'test value'
self.delete(key)
except OSError:
raise ImproperlyConfigured(E_PATH_INVALID)
def _filename(self, key):
return self.sep.join((self.path, key))
def get(self, key):
try:
with self.open(self._filename(key), 'rb') as infile:
return infile.read()
except FileNotFoundError:
pass
def set(self, key, value):
with self.open(self._filename(key), 'wb') as outfile:
outfile.write(ensure_bytes(value))
def mget(self, keys):
for key in keys:
yield self.get(key)
def delete(self, key):
self.unlink(self._filename(key))
def cleanup(self):
"""Delete expired meta-data."""
if not self.expires:
return
epoch = datetime(1970, 1, 1, tzinfo=self.app.timezone)
now_ts = (self.app.now() - epoch).total_seconds()
cutoff_ts = now_ts - self.expires
for filename in os.listdir(self.path):
for prefix in (self.task_keyprefix, self.group_keyprefix,
self.chord_keyprefix):
if filename.startswith(prefix):
path = os.path.join(self.path, filename)
if os.stat(path).st_mtime < cutoff_ts:
self.unlink(path)
break

View File

@@ -0,0 +1,333 @@
"""MongoDB result store backend."""
from datetime import datetime, timedelta
from kombu.exceptions import EncodeError
from kombu.utils.objects import cached_property
from kombu.utils.url import maybe_sanitize_url, urlparse
from celery import states
from celery.exceptions import ImproperlyConfigured
from .base import BaseBackend
try:
import pymongo
except ImportError:
pymongo = None
if pymongo:
try:
from bson.binary import Binary
except ImportError:
from pymongo.binary import Binary
from pymongo.errors import InvalidDocument
else: # pragma: no cover
Binary = None
class InvalidDocument(Exception):
pass
__all__ = ('MongoBackend',)
BINARY_CODECS = frozenset(['pickle', 'msgpack'])
class MongoBackend(BaseBackend):
"""MongoDB result backend.
Raises:
celery.exceptions.ImproperlyConfigured:
if module :pypi:`pymongo` is not available.
"""
mongo_host = None
host = 'localhost'
port = 27017
user = None
password = None
database_name = 'celery'
taskmeta_collection = 'celery_taskmeta'
groupmeta_collection = 'celery_groupmeta'
max_pool_size = 10
options = None
supports_autoexpire = False
_connection = None
def __init__(self, app=None, **kwargs):
self.options = {}
super().__init__(app, **kwargs)
if not pymongo:
raise ImproperlyConfigured(
'You need to install the pymongo library to use the '
'MongoDB backend.')
# Set option defaults
for key, value in self._prepare_client_options().items():
self.options.setdefault(key, value)
# update conf with mongo uri data, only if uri was given
if self.url:
self.url = self._ensure_mongodb_uri_compliance(self.url)
uri_data = pymongo.uri_parser.parse_uri(self.url)
# build the hosts list to create a mongo connection
hostslist = [
f'{x[0]}:{x[1]}' for x in uri_data['nodelist']
]
self.user = uri_data['username']
self.password = uri_data['password']
self.mongo_host = hostslist
if uri_data['database']:
# if no database is provided in the uri, use default
self.database_name = uri_data['database']
self.options.update(uri_data['options'])
# update conf with specific settings
config = self.app.conf.get('mongodb_backend_settings')
if config is not None:
if not isinstance(config, dict):
raise ImproperlyConfigured(
'MongoDB backend settings should be grouped in a dict')
config = dict(config) # don't modify original
if 'host' in config or 'port' in config:
# these should take over uri conf
self.mongo_host = None
self.host = config.pop('host', self.host)
self.port = config.pop('port', self.port)
self.mongo_host = config.pop('mongo_host', self.mongo_host)
self.user = config.pop('user', self.user)
self.password = config.pop('password', self.password)
self.database_name = config.pop('database', self.database_name)
self.taskmeta_collection = config.pop(
'taskmeta_collection', self.taskmeta_collection,
)
self.groupmeta_collection = config.pop(
'groupmeta_collection', self.groupmeta_collection,
)
self.options.update(config.pop('options', {}))
self.options.update(config)
@staticmethod
def _ensure_mongodb_uri_compliance(url):
parsed_url = urlparse(url)
if not parsed_url.scheme.startswith('mongodb'):
url = f'mongodb+{url}'
if url == 'mongodb://':
url += 'localhost'
return url
def _prepare_client_options(self):
if pymongo.version_tuple >= (3,):
return {'maxPoolSize': self.max_pool_size}
else: # pragma: no cover
return {'max_pool_size': self.max_pool_size,
'auto_start_request': False}
def _get_connection(self):
"""Connect to the MongoDB server."""
if self._connection is None:
from pymongo import MongoClient
host = self.mongo_host
if not host:
# The first pymongo.Connection() argument (host) can be
# a list of ['host:port'] elements or a mongodb connection
# URI. If this is the case, don't use self.port
# but let pymongo get the port(s) from the URI instead.
# This enables the use of replica sets and sharding.
# See pymongo.Connection() for more info.
host = self.host
if isinstance(host, str) \
and not host.startswith('mongodb://'):
host = f'mongodb://{host}:{self.port}'
# don't change self.options
conf = dict(self.options)
conf['host'] = host
if self.user:
conf['username'] = self.user
if self.password:
conf['password'] = self.password
self._connection = MongoClient(**conf)
return self._connection
def encode(self, data):
if self.serializer == 'bson':
# mongodb handles serialization
return data
payload = super().encode(data)
# serializer which are in a unsupported format (pickle/binary)
if self.serializer in BINARY_CODECS:
payload = Binary(payload)
return payload
def decode(self, data):
if self.serializer == 'bson':
return data
return super().decode(data)
def _store_result(self, task_id, result, state,
traceback=None, request=None, **kwargs):
"""Store return value and state of an executed task."""
meta = self._get_result_meta(result=self.encode(result), state=state,
traceback=traceback, request=request,
format_date=False)
# Add the _id for mongodb
meta['_id'] = task_id
try:
self.collection.replace_one({'_id': task_id}, meta, upsert=True)
except InvalidDocument as exc:
raise EncodeError(exc)
return result
def _get_task_meta_for(self, task_id):
"""Get task meta-data for a task by id."""
obj = self.collection.find_one({'_id': task_id})
if obj:
if self.app.conf.find_value_for_key('extended', 'result'):
return self.meta_from_decoded({
'name': obj['name'],
'args': obj['args'],
'task_id': obj['_id'],
'queue': obj['queue'],
'kwargs': obj['kwargs'],
'status': obj['status'],
'worker': obj['worker'],
'retries': obj['retries'],
'children': obj['children'],
'date_done': obj['date_done'],
'traceback': obj['traceback'],
'result': self.decode(obj['result']),
})
return self.meta_from_decoded({
'task_id': obj['_id'],
'status': obj['status'],
'result': self.decode(obj['result']),
'date_done': obj['date_done'],
'traceback': obj['traceback'],
'children': obj['children'],
})
return {'status': states.PENDING, 'result': None}
def _save_group(self, group_id, result):
"""Save the group result."""
meta = {
'_id': group_id,
'result': self.encode([i.id for i in result]),
'date_done': datetime.utcnow(),
}
self.group_collection.replace_one({'_id': group_id}, meta, upsert=True)
return result
def _restore_group(self, group_id):
"""Get the result for a group by id."""
obj = self.group_collection.find_one({'_id': group_id})
if obj:
return {
'task_id': obj['_id'],
'date_done': obj['date_done'],
'result': [
self.app.AsyncResult(task)
for task in self.decode(obj['result'])
],
}
def _delete_group(self, group_id):
"""Delete a group by id."""
self.group_collection.delete_one({'_id': group_id})
def _forget(self, task_id):
"""Remove result from MongoDB.
Raises:
pymongo.exceptions.OperationsError:
if the task_id could not be removed.
"""
# By using safe=True, this will wait until it receives a response from
# the server. Likewise, it will raise an OperationsError if the
# response was unable to be completed.
self.collection.delete_one({'_id': task_id})
def cleanup(self):
"""Delete expired meta-data."""
if not self.expires:
return
self.collection.delete_many(
{'date_done': {'$lt': self.app.now() - self.expires_delta}},
)
self.group_collection.delete_many(
{'date_done': {'$lt': self.app.now() - self.expires_delta}},
)
def __reduce__(self, args=(), kwargs=None):
kwargs = {} if not kwargs else kwargs
return super().__reduce__(
args, dict(kwargs, expires=self.expires, url=self.url))
def _get_database(self):
conn = self._get_connection()
return conn[self.database_name]
@cached_property
def database(self):
"""Get database from MongoDB connection.
performs authentication if necessary.
"""
return self._get_database()
@cached_property
def collection(self):
"""Get the meta-data task collection."""
collection = self.database[self.taskmeta_collection]
# Ensure an index on date_done is there, if not process the index
# in the background. Once completed cleanup will be much faster
collection.create_index('date_done', background=True)
return collection
@cached_property
def group_collection(self):
"""Get the meta-data task collection."""
collection = self.database[self.groupmeta_collection]
# Ensure an index on date_done is there, if not process the index
# in the background. Once completed cleanup will be much faster
collection.create_index('date_done', background=True)
return collection
@cached_property
def expires_delta(self):
return timedelta(seconds=self.expires)
def as_uri(self, include_password=False):
"""Return the backend as an URI.
Arguments:
include_password (bool): Password censored if disabled.
"""
if not self.url:
return 'mongodb://'
if include_password:
return self.url
if ',' not in self.url:
return maybe_sanitize_url(self.url)
uri1, remainder = self.url.split(',', 1)
return ','.join([maybe_sanitize_url(uri1), remainder])

View File

@@ -0,0 +1,668 @@
"""Redis result store backend."""
import time
from contextlib import contextmanager
from functools import partial
from ssl import CERT_NONE, CERT_OPTIONAL, CERT_REQUIRED
from urllib.parse import unquote
from kombu.utils.functional import retry_over_time
from kombu.utils.objects import cached_property
from kombu.utils.url import _parse_url, maybe_sanitize_url
from celery import states
from celery._state import task_join_will_block
from celery.canvas import maybe_signature
from celery.exceptions import BackendStoreError, ChordError, ImproperlyConfigured
from celery.result import GroupResult, allow_join_result
from celery.utils.functional import _regen, dictfilter
from celery.utils.log import get_logger
from celery.utils.time import humanize_seconds
from .asynchronous import AsyncBackendMixin, BaseResultConsumer
from .base import BaseKeyValueStoreBackend
try:
import redis.connection
from kombu.transport.redis import get_redis_error_classes
except ImportError:
redis = None
get_redis_error_classes = None
try:
import redis.sentinel
except ImportError:
pass
__all__ = ('RedisBackend', 'SentinelBackend')
E_REDIS_MISSING = """
You need to install the redis library in order to use \
the Redis result store backend.
"""
E_REDIS_SENTINEL_MISSING = """
You need to install the redis library with support of \
sentinel in order to use the Redis result store backend.
"""
W_REDIS_SSL_CERT_OPTIONAL = """
Setting ssl_cert_reqs=CERT_OPTIONAL when connecting to redis means that \
celery might not validate the identity of the redis broker when connecting. \
This leaves you vulnerable to man in the middle attacks.
"""
W_REDIS_SSL_CERT_NONE = """
Setting ssl_cert_reqs=CERT_NONE when connecting to redis means that celery \
will not validate the identity of the redis broker when connecting. This \
leaves you vulnerable to man in the middle attacks.
"""
E_REDIS_SSL_PARAMS_AND_SCHEME_MISMATCH = """
SSL connection parameters have been provided but the specified URL scheme \
is redis://. A Redis SSL connection URL should use the scheme rediss://.
"""
E_REDIS_SSL_CERT_REQS_MISSING_INVALID = """
A rediss:// URL must have parameter ssl_cert_reqs and this must be set to \
CERT_REQUIRED, CERT_OPTIONAL, or CERT_NONE
"""
E_LOST = 'Connection to Redis lost: Retry (%s/%s) %s.'
E_RETRY_LIMIT_EXCEEDED = """
Retry limit exceeded while trying to reconnect to the Celery redis result \
store backend. The Celery application must be restarted.
"""
logger = get_logger(__name__)
class ResultConsumer(BaseResultConsumer):
_pubsub = None
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._get_key_for_task = self.backend.get_key_for_task
self._decode_result = self.backend.decode_result
self._ensure = self.backend.ensure
self._connection_errors = self.backend.connection_errors
self.subscribed_to = set()
def on_after_fork(self):
try:
self.backend.client.connection_pool.reset()
if self._pubsub is not None:
self._pubsub.close()
except KeyError as e:
logger.warning(str(e))
super().on_after_fork()
def _reconnect_pubsub(self):
self._pubsub = None
self.backend.client.connection_pool.reset()
# task state might have changed when the connection was down so we
# retrieve meta for all subscribed tasks before going into pubsub mode
if self.subscribed_to:
metas = self.backend.client.mget(self.subscribed_to)
metas = [meta for meta in metas if meta]
for meta in metas:
self.on_state_change(self._decode_result(meta), None)
self._pubsub = self.backend.client.pubsub(
ignore_subscribe_messages=True,
)
# subscribed_to maybe empty after on_state_change
if self.subscribed_to:
self._pubsub.subscribe(*self.subscribed_to)
else:
self._pubsub.connection = self._pubsub.connection_pool.get_connection(
'pubsub', self._pubsub.shard_hint
)
# even if there is nothing to subscribe, we should not lose the callback after connecting.
# The on_connect callback will re-subscribe to any channels we previously subscribed to.
self._pubsub.connection.register_connect_callback(self._pubsub.on_connect)
@contextmanager
def reconnect_on_error(self):
try:
yield
except self._connection_errors:
try:
self._ensure(self._reconnect_pubsub, ())
except self._connection_errors:
logger.critical(E_RETRY_LIMIT_EXCEEDED)
raise
def _maybe_cancel_ready_task(self, meta):
if meta['status'] in states.READY_STATES:
self.cancel_for(meta['task_id'])
def on_state_change(self, meta, message):
super().on_state_change(meta, message)
self._maybe_cancel_ready_task(meta)
def start(self, initial_task_id, **kwargs):
self._pubsub = self.backend.client.pubsub(
ignore_subscribe_messages=True,
)
self._consume_from(initial_task_id)
def on_wait_for_pending(self, result, **kwargs):
for meta in result._iter_meta(**kwargs):
if meta is not None:
self.on_state_change(meta, None)
def stop(self):
if self._pubsub is not None:
self._pubsub.close()
def drain_events(self, timeout=None):
if self._pubsub:
with self.reconnect_on_error():
message = self._pubsub.get_message(timeout=timeout)
if message and message['type'] == 'message':
self.on_state_change(self._decode_result(message['data']), message)
elif timeout:
time.sleep(timeout)
def consume_from(self, task_id):
if self._pubsub is None:
return self.start(task_id)
self._consume_from(task_id)
def _consume_from(self, task_id):
key = self._get_key_for_task(task_id)
if key not in self.subscribed_to:
self.subscribed_to.add(key)
with self.reconnect_on_error():
self._pubsub.subscribe(key)
def cancel_for(self, task_id):
key = self._get_key_for_task(task_id)
self.subscribed_to.discard(key)
if self._pubsub:
with self.reconnect_on_error():
self._pubsub.unsubscribe(key)
class RedisBackend(BaseKeyValueStoreBackend, AsyncBackendMixin):
"""Redis task result store.
It makes use of the following commands:
GET, MGET, DEL, INCRBY, EXPIRE, SET, SETEX
"""
ResultConsumer = ResultConsumer
#: :pypi:`redis` client module.
redis = redis
connection_class_ssl = redis.SSLConnection if redis else None
#: Maximum number of connections in the pool.
max_connections = None
supports_autoexpire = True
supports_native_join = True
#: Maximal length of string value in Redis.
#: 512 MB - https://redis.io/topics/data-types
_MAX_STR_VALUE_SIZE = 536870912
def __init__(self, host=None, port=None, db=None, password=None,
max_connections=None, url=None,
connection_pool=None, **kwargs):
super().__init__(expires_type=int, **kwargs)
_get = self.app.conf.get
if self.redis is None:
raise ImproperlyConfigured(E_REDIS_MISSING.strip())
if host and '://' in host:
url, host = host, None
self.max_connections = (
max_connections or
_get('redis_max_connections') or
self.max_connections)
self._ConnectionPool = connection_pool
socket_timeout = _get('redis_socket_timeout')
socket_connect_timeout = _get('redis_socket_connect_timeout')
retry_on_timeout = _get('redis_retry_on_timeout')
socket_keepalive = _get('redis_socket_keepalive')
health_check_interval = _get('redis_backend_health_check_interval')
self.connparams = {
'host': _get('redis_host') or 'localhost',
'port': _get('redis_port') or 6379,
'db': _get('redis_db') or 0,
'password': _get('redis_password'),
'max_connections': self.max_connections,
'socket_timeout': socket_timeout and float(socket_timeout),
'retry_on_timeout': retry_on_timeout or False,
'socket_connect_timeout':
socket_connect_timeout and float(socket_connect_timeout),
}
username = _get('redis_username')
if username:
# We're extra careful to avoid including this configuration value
# if it wasn't specified since older versions of py-redis
# don't support specifying a username.
# Only Redis>6.0 supports username/password authentication.
# TODO: Include this in connparams' definition once we drop
# support for py-redis<3.4.0.
self.connparams['username'] = username
if health_check_interval:
self.connparams["health_check_interval"] = health_check_interval
# absent in redis.connection.UnixDomainSocketConnection
if socket_keepalive:
self.connparams['socket_keepalive'] = socket_keepalive
# "redis_backend_use_ssl" must be a dict with the keys:
# 'ssl_cert_reqs', 'ssl_ca_certs', 'ssl_certfile', 'ssl_keyfile'
# (the same as "broker_use_ssl")
ssl = _get('redis_backend_use_ssl')
if ssl:
self.connparams.update(ssl)
self.connparams['connection_class'] = self.connection_class_ssl
if url:
self.connparams = self._params_from_url(url, self.connparams)
# If we've received SSL parameters via query string or the
# redis_backend_use_ssl dict, check ssl_cert_reqs is valid. If set
# via query string ssl_cert_reqs will be a string so convert it here
if ('connection_class' in self.connparams and
issubclass(self.connparams['connection_class'], redis.SSLConnection)):
ssl_cert_reqs_missing = 'MISSING'
ssl_string_to_constant = {'CERT_REQUIRED': CERT_REQUIRED,
'CERT_OPTIONAL': CERT_OPTIONAL,
'CERT_NONE': CERT_NONE,
'required': CERT_REQUIRED,
'optional': CERT_OPTIONAL,
'none': CERT_NONE}
ssl_cert_reqs = self.connparams.get('ssl_cert_reqs', ssl_cert_reqs_missing)
ssl_cert_reqs = ssl_string_to_constant.get(ssl_cert_reqs, ssl_cert_reqs)
if ssl_cert_reqs not in ssl_string_to_constant.values():
raise ValueError(E_REDIS_SSL_CERT_REQS_MISSING_INVALID)
if ssl_cert_reqs == CERT_OPTIONAL:
logger.warning(W_REDIS_SSL_CERT_OPTIONAL)
elif ssl_cert_reqs == CERT_NONE:
logger.warning(W_REDIS_SSL_CERT_NONE)
self.connparams['ssl_cert_reqs'] = ssl_cert_reqs
self.url = url
self.connection_errors, self.channel_errors = (
get_redis_error_classes() if get_redis_error_classes
else ((), ()))
self.result_consumer = self.ResultConsumer(
self, self.app, self.accept,
self._pending_results, self._pending_messages,
)
def _params_from_url(self, url, defaults):
scheme, host, port, username, password, path, query = _parse_url(url)
connparams = dict(
defaults, **dictfilter({
'host': host, 'port': port, 'username': username,
'password': password, 'db': query.pop('virtual_host', None)})
)
if scheme == 'socket':
# use 'path' as path to the socket… in this case
# the database number should be given in 'query'
connparams.update({
'connection_class': self.redis.UnixDomainSocketConnection,
'path': '/' + path,
})
# host+port are invalid options when using this connection type.
connparams.pop('host', None)
connparams.pop('port', None)
connparams.pop('socket_connect_timeout')
else:
connparams['db'] = path
ssl_param_keys = ['ssl_ca_certs', 'ssl_certfile', 'ssl_keyfile',
'ssl_cert_reqs']
if scheme == 'redis':
# If connparams or query string contain ssl params, raise error
if (any(key in connparams for key in ssl_param_keys) or
any(key in query for key in ssl_param_keys)):
raise ValueError(E_REDIS_SSL_PARAMS_AND_SCHEME_MISMATCH)
if scheme == 'rediss':
connparams['connection_class'] = redis.SSLConnection
# The following parameters, if present in the URL, are encoded. We
# must add the decoded values to connparams.
for ssl_setting in ssl_param_keys:
ssl_val = query.pop(ssl_setting, None)
if ssl_val:
connparams[ssl_setting] = unquote(ssl_val)
# db may be string and start with / like in kombu.
db = connparams.get('db') or 0
db = db.strip('/') if isinstance(db, str) else db
connparams['db'] = int(db)
for key, value in query.items():
if key in redis.connection.URL_QUERY_ARGUMENT_PARSERS:
query[key] = redis.connection.URL_QUERY_ARGUMENT_PARSERS[key](
value
)
# Query parameters override other parameters
connparams.update(query)
return connparams
@cached_property
def retry_policy(self):
retry_policy = super().retry_policy
if "retry_policy" in self._transport_options:
retry_policy = retry_policy.copy()
retry_policy.update(self._transport_options['retry_policy'])
return retry_policy
def on_task_call(self, producer, task_id):
if not task_join_will_block():
self.result_consumer.consume_from(task_id)
def get(self, key):
return self.client.get(key)
def mget(self, keys):
return self.client.mget(keys)
def ensure(self, fun, args, **policy):
retry_policy = dict(self.retry_policy, **policy)
max_retries = retry_policy.get('max_retries')
return retry_over_time(
fun, self.connection_errors, args, {},
partial(self.on_connection_error, max_retries),
**retry_policy)
def on_connection_error(self, max_retries, exc, intervals, retries):
tts = next(intervals)
logger.error(
E_LOST.strip(),
retries, max_retries or 'Inf', humanize_seconds(tts, 'in '))
return tts
def set(self, key, value, **retry_policy):
if isinstance(value, str) and len(value) > self._MAX_STR_VALUE_SIZE:
raise BackendStoreError('value too large for Redis backend')
return self.ensure(self._set, (key, value), **retry_policy)
def _set(self, key, value):
with self.client.pipeline() as pipe:
if self.expires:
pipe.setex(key, self.expires, value)
else:
pipe.set(key, value)
pipe.publish(key, value)
pipe.execute()
def forget(self, task_id):
super().forget(task_id)
self.result_consumer.cancel_for(task_id)
def delete(self, key):
self.client.delete(key)
def incr(self, key):
return self.client.incr(key)
def expire(self, key, value):
return self.client.expire(key, value)
def add_to_chord(self, group_id, result):
self.client.incr(self.get_key_for_group(group_id, '.t'), 1)
def _unpack_chord_result(self, tup, decode,
EXCEPTION_STATES=states.EXCEPTION_STATES,
PROPAGATE_STATES=states.PROPAGATE_STATES):
_, tid, state, retval = decode(tup)
if state in EXCEPTION_STATES:
retval = self.exception_to_python(retval)
if state in PROPAGATE_STATES:
raise ChordError(f'Dependency {tid} raised {retval!r}')
return retval
def set_chord_size(self, group_id, chord_size):
self.set(self.get_key_for_group(group_id, '.s'), chord_size)
def apply_chord(self, header_result_args, body, **kwargs):
# If any of the child results of this chord are complex (ie. group
# results themselves), we need to save `header_result` to ensure that
# the expected structure is retained when we finish the chord and pass
# the results onward to the body in `on_chord_part_return()`. We don't
# do this is all cases to retain an optimisation in the common case
# where a chord header is comprised of simple result objects.
if not isinstance(header_result_args[1], _regen):
header_result = self.app.GroupResult(*header_result_args)
if any(isinstance(nr, GroupResult) for nr in header_result.results):
header_result.save(backend=self)
@cached_property
def _chord_zset(self):
return self._transport_options.get('result_chord_ordered', True)
@cached_property
def _transport_options(self):
return self.app.conf.get('result_backend_transport_options', {})
def on_chord_part_return(self, request, state, result,
propagate=None, **kwargs):
app = self.app
tid, gid, group_index = request.id, request.group, request.group_index
if not gid or not tid:
return
if group_index is None:
group_index = '+inf'
client = self.client
jkey = self.get_key_for_group(gid, '.j')
tkey = self.get_key_for_group(gid, '.t')
skey = self.get_key_for_group(gid, '.s')
result = self.encode_result(result, state)
encoded = self.encode([1, tid, state, result])
with client.pipeline() as pipe:
pipeline = (
pipe.zadd(jkey, {encoded: group_index}).zcount(jkey, "-inf", "+inf")
if self._chord_zset
else pipe.rpush(jkey, encoded).llen(jkey)
).get(tkey).get(skey)
if self.expires:
pipeline = pipeline \
.expire(jkey, self.expires) \
.expire(tkey, self.expires) \
.expire(skey, self.expires)
_, readycount, totaldiff, chord_size_bytes = pipeline.execute()[:4]
totaldiff = int(totaldiff or 0)
if chord_size_bytes:
try:
callback = maybe_signature(request.chord, app=app)
total = int(chord_size_bytes) + totaldiff
if readycount == total:
header_result = GroupResult.restore(gid)
if header_result is not None:
# If we manage to restore a `GroupResult`, then it must
# have been complex and saved by `apply_chord()` earlier.
#
# Before we can join the `GroupResult`, it needs to be
# manually marked as ready to avoid blocking
header_result.on_ready()
# We'll `join()` it to get the results and ensure they are
# structured as intended rather than the flattened version
# we'd construct without any other information.
join_func = (
header_result.join_native
if header_result.supports_native_join
else header_result.join
)
with allow_join_result():
resl = join_func(
timeout=app.conf.result_chord_join_timeout,
propagate=True
)
else:
# Otherwise simply extract and decode the results we
# stashed along the way, which should be faster for large
# numbers of simple results in the chord header.
decode, unpack = self.decode, self._unpack_chord_result
with client.pipeline() as pipe:
if self._chord_zset:
pipeline = pipe.zrange(jkey, 0, -1)
else:
pipeline = pipe.lrange(jkey, 0, total)
resl, = pipeline.execute()
resl = [unpack(tup, decode) for tup in resl]
try:
callback.delay(resl)
except Exception as exc: # pylint: disable=broad-except
logger.exception(
'Chord callback for %r raised: %r', request.group, exc)
return self.chord_error_from_stack(
callback,
ChordError(f'Callback error: {exc!r}'),
)
finally:
with client.pipeline() as pipe:
pipe \
.delete(jkey) \
.delete(tkey) \
.delete(skey) \
.execute()
except ChordError as exc:
logger.exception('Chord %r raised: %r', request.group, exc)
return self.chord_error_from_stack(callback, exc)
except Exception as exc: # pylint: disable=broad-except
logger.exception('Chord %r raised: %r', request.group, exc)
return self.chord_error_from_stack(
callback,
ChordError(f'Join error: {exc!r}'),
)
def _create_client(self, **params):
return self._get_client()(
connection_pool=self._get_pool(**params),
)
def _get_client(self):
return self.redis.StrictRedis
def _get_pool(self, **params):
return self.ConnectionPool(**params)
@property
def ConnectionPool(self):
if self._ConnectionPool is None:
self._ConnectionPool = self.redis.ConnectionPool
return self._ConnectionPool
@cached_property
def client(self):
return self._create_client(**self.connparams)
def __reduce__(self, args=(), kwargs=None):
kwargs = {} if not kwargs else kwargs
return super().__reduce__(
args, dict(kwargs, expires=self.expires, url=self.url))
if getattr(redis, "sentinel", None):
class SentinelManagedSSLConnection(
redis.sentinel.SentinelManagedConnection,
redis.SSLConnection):
"""Connect to a Redis server using Sentinel + TLS.
Use Sentinel to identify which Redis server is the current master
to connect to and when connecting to the Master server, use an
SSL Connection.
"""
class SentinelBackend(RedisBackend):
"""Redis sentinel task result store."""
# URL looks like `sentinel://0.0.0.0:26347/3;sentinel://0.0.0.0:26348/3`
_SERVER_URI_SEPARATOR = ";"
sentinel = getattr(redis, "sentinel", None)
connection_class_ssl = SentinelManagedSSLConnection if sentinel else None
def __init__(self, *args, **kwargs):
if self.sentinel is None:
raise ImproperlyConfigured(E_REDIS_SENTINEL_MISSING.strip())
super().__init__(*args, **kwargs)
def as_uri(self, include_password=False):
"""Return the server addresses as URIs, sanitizing the password or not."""
# Allow superclass to do work if we don't need to force sanitization
if include_password:
return super().as_uri(
include_password=include_password,
)
# Otherwise we need to ensure that all components get sanitized rather
# by passing them one by one to the `kombu` helper
uri_chunks = (
maybe_sanitize_url(chunk)
for chunk in (self.url or "").split(self._SERVER_URI_SEPARATOR)
)
# Similar to the superclass, strip the trailing slash from URIs with
# all components empty other than the scheme
return self._SERVER_URI_SEPARATOR.join(
uri[:-1] if uri.endswith(":///") else uri
for uri in uri_chunks
)
def _params_from_url(self, url, defaults):
chunks = url.split(self._SERVER_URI_SEPARATOR)
connparams = dict(defaults, hosts=[])
for chunk in chunks:
data = super()._params_from_url(
url=chunk, defaults=defaults)
connparams['hosts'].append(data)
for param in ("host", "port", "db", "password"):
connparams.pop(param)
# Adding db/password in connparams to connect to the correct instance
for param in ("db", "password"):
if connparams['hosts'] and param in connparams['hosts'][0]:
connparams[param] = connparams['hosts'][0].get(param)
return connparams
def _get_sentinel_instance(self, **params):
connparams = params.copy()
hosts = connparams.pop("hosts")
min_other_sentinels = self._transport_options.get("min_other_sentinels", 0)
sentinel_kwargs = self._transport_options.get("sentinel_kwargs", {})
sentinel_instance = self.sentinel.Sentinel(
[(cp['host'], cp['port']) for cp in hosts],
min_other_sentinels=min_other_sentinels,
sentinel_kwargs=sentinel_kwargs,
**connparams)
return sentinel_instance
def _get_pool(self, **params):
sentinel_instance = self._get_sentinel_instance(**params)
master_name = self._transport_options.get("master_name", None)
return sentinel_instance.master_for(
service_name=master_name,
redis_class=self._get_client(),
).connection_pool

View File

@@ -0,0 +1,342 @@
"""The ``RPC`` result backend for AMQP brokers.
RPC-style result backend, using reply-to and one queue per client.
"""
import time
import kombu
from kombu.common import maybe_declare
from kombu.utils.compat import register_after_fork
from kombu.utils.objects import cached_property
from celery import states
from celery._state import current_task, task_join_will_block
from . import base
from .asynchronous import AsyncBackendMixin, BaseResultConsumer
__all__ = ('BacklogLimitExceeded', 'RPCBackend')
E_NO_CHORD_SUPPORT = """
The "rpc" result backend does not support chords!
Note that a group chained with a task is also upgraded to be a chord,
as this pattern requires synchronization.
Result backends that supports chords: Redis, Database, Memcached, and more.
"""
class BacklogLimitExceeded(Exception):
"""Too much state history to fast-forward."""
def _on_after_fork_cleanup_backend(backend):
backend._after_fork()
class ResultConsumer(BaseResultConsumer):
Consumer = kombu.Consumer
_connection = None
_consumer = None
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._create_binding = self.backend._create_binding
def start(self, initial_task_id, no_ack=True, **kwargs):
self._connection = self.app.connection()
initial_queue = self._create_binding(initial_task_id)
self._consumer = self.Consumer(
self._connection.default_channel, [initial_queue],
callbacks=[self.on_state_change], no_ack=no_ack,
accept=self.accept)
self._consumer.consume()
def drain_events(self, timeout=None):
if self._connection:
return self._connection.drain_events(timeout=timeout)
elif timeout:
time.sleep(timeout)
def stop(self):
try:
self._consumer.cancel()
finally:
self._connection.close()
def on_after_fork(self):
self._consumer = None
if self._connection is not None:
self._connection.collect()
self._connection = None
def consume_from(self, task_id):
if self._consumer is None:
return self.start(task_id)
queue = self._create_binding(task_id)
if not self._consumer.consuming_from(queue):
self._consumer.add_queue(queue)
self._consumer.consume()
def cancel_for(self, task_id):
if self._consumer:
self._consumer.cancel_by_queue(self._create_binding(task_id).name)
class RPCBackend(base.Backend, AsyncBackendMixin):
"""Base class for the RPC result backend."""
Exchange = kombu.Exchange
Producer = kombu.Producer
ResultConsumer = ResultConsumer
#: Exception raised when there are too many messages for a task id.
BacklogLimitExceeded = BacklogLimitExceeded
persistent = False
supports_autoexpire = True
supports_native_join = True
retry_policy = {
'max_retries': 20,
'interval_start': 0,
'interval_step': 1,
'interval_max': 1,
}
class Consumer(kombu.Consumer):
"""Consumer that requires manual declaration of queues."""
auto_declare = False
class Queue(kombu.Queue):
"""Queue that never caches declaration."""
can_cache_declaration = False
def __init__(self, app, connection=None, exchange=None, exchange_type=None,
persistent=None, serializer=None, auto_delete=True, **kwargs):
super().__init__(app, **kwargs)
conf = self.app.conf
self._connection = connection
self._out_of_band = {}
self.persistent = self.prepare_persistent(persistent)
self.delivery_mode = 2 if self.persistent else 1
exchange = exchange or conf.result_exchange
exchange_type = exchange_type or conf.result_exchange_type
self.exchange = self._create_exchange(
exchange, exchange_type, self.delivery_mode,
)
self.serializer = serializer or conf.result_serializer
self.auto_delete = auto_delete
self.result_consumer = self.ResultConsumer(
self, self.app, self.accept,
self._pending_results, self._pending_messages,
)
if register_after_fork is not None:
register_after_fork(self, _on_after_fork_cleanup_backend)
def _after_fork(self):
# clear state for child processes.
self._pending_results.clear()
self.result_consumer._after_fork()
def _create_exchange(self, name, type='direct', delivery_mode=2):
# uses direct to queue routing (anon exchange).
return self.Exchange(None)
def _create_binding(self, task_id):
"""Create new binding for task with id."""
# RPC backend caches the binding, as one queue is used for all tasks.
return self.binding
def ensure_chords_allowed(self):
raise NotImplementedError(E_NO_CHORD_SUPPORT.strip())
def on_task_call(self, producer, task_id):
# Called every time a task is sent when using this backend.
# We declare the queue we receive replies on in advance of sending
# the message, but we skip this if running in the prefork pool
# (task_join_will_block), as we know the queue is already declared.
if not task_join_will_block():
maybe_declare(self.binding(producer.channel), retry=True)
def destination_for(self, task_id, request):
"""Get the destination for result by task id.
Returns:
Tuple[str, str]: tuple of ``(reply_to, correlation_id)``.
"""
# Backends didn't always receive the `request`, so we must still
# support old code that relies on current_task.
try:
request = request or current_task.request
except AttributeError:
raise RuntimeError(
f'RPC backend missing task request for {task_id!r}')
return request.reply_to, request.correlation_id or task_id
def on_reply_declare(self, task_id):
# Return value here is used as the `declare=` argument
# for Producer.publish.
# By default we don't have to declare anything when sending a result.
pass
def on_result_fulfilled(self, result):
# This usually cancels the queue after the result is received,
# but we don't have to cancel since we have one queue per process.
pass
def as_uri(self, include_password=True):
return 'rpc://'
def store_result(self, task_id, result, state,
traceback=None, request=None, **kwargs):
"""Send task return value and state."""
routing_key, correlation_id = self.destination_for(task_id, request)
if not routing_key:
return
with self.app.amqp.producer_pool.acquire(block=True) as producer:
producer.publish(
self._to_result(task_id, state, result, traceback, request),
exchange=self.exchange,
routing_key=routing_key,
correlation_id=correlation_id,
serializer=self.serializer,
retry=True, retry_policy=self.retry_policy,
declare=self.on_reply_declare(task_id),
delivery_mode=self.delivery_mode,
)
return result
def _to_result(self, task_id, state, result, traceback, request):
return {
'task_id': task_id,
'status': state,
'result': self.encode_result(result, state),
'traceback': traceback,
'children': self.current_task_children(request),
}
def on_out_of_band_result(self, task_id, message):
# Callback called when a reply for a task is received,
# but we have no idea what do do with it.
# Since the result is not pending, we put it in a separate
# buffer: probably it will become pending later.
if self.result_consumer:
self.result_consumer.on_out_of_band_result(message)
self._out_of_band[task_id] = message
def get_task_meta(self, task_id, backlog_limit=1000):
buffered = self._out_of_band.pop(task_id, None)
if buffered:
return self._set_cache_by_message(task_id, buffered)
# Polling and using basic_get
latest_by_id = {}
prev = None
for acc in self._slurp_from_queue(task_id, self.accept, backlog_limit):
tid = self._get_message_task_id(acc)
prev, latest_by_id[tid] = latest_by_id.get(tid), acc
if prev:
# backends aren't expected to keep history,
# so we delete everything except the most recent state.
prev.ack()
prev = None
latest = latest_by_id.pop(task_id, None)
for tid, msg in latest_by_id.items():
self.on_out_of_band_result(tid, msg)
if latest:
latest.requeue()
return self._set_cache_by_message(task_id, latest)
else:
# no new state, use previous
try:
return self._cache[task_id]
except KeyError:
# result probably pending.
return {'status': states.PENDING, 'result': None}
poll = get_task_meta # XXX compat
def _set_cache_by_message(self, task_id, message):
payload = self._cache[task_id] = self.meta_from_decoded(
message.payload)
return payload
def _slurp_from_queue(self, task_id, accept,
limit=1000, no_ack=False):
with self.app.pool.acquire_channel(block=True) as (_, channel):
binding = self._create_binding(task_id)(channel)
binding.declare()
for _ in range(limit):
msg = binding.get(accept=accept, no_ack=no_ack)
if not msg:
break
yield msg
else:
raise self.BacklogLimitExceeded(task_id)
def _get_message_task_id(self, message):
try:
# try property first so we don't have to deserialize
# the payload.
return message.properties['correlation_id']
except (AttributeError, KeyError):
# message sent by old Celery version, need to deserialize.
return message.payload['task_id']
def revive(self, channel):
pass
def reload_task_result(self, task_id):
raise NotImplementedError(
'reload_task_result is not supported by this backend.')
def reload_group_result(self, task_id):
"""Reload group result, even if it has been previously fetched."""
raise NotImplementedError(
'reload_group_result is not supported by this backend.')
def save_group(self, group_id, result):
raise NotImplementedError(
'save_group is not supported by this backend.')
def restore_group(self, group_id, cache=True):
raise NotImplementedError(
'restore_group is not supported by this backend.')
def delete_group(self, group_id):
raise NotImplementedError(
'delete_group is not supported by this backend.')
def __reduce__(self, args=(), kwargs=None):
kwargs = {} if not kwargs else kwargs
return super().__reduce__(args, dict(
kwargs,
connection=self._connection,
exchange=self.exchange.name,
exchange_type=self.exchange.type,
persistent=self.persistent,
serializer=self.serializer,
auto_delete=self.auto_delete,
expires=self.expires,
))
@property
def binding(self):
return self.Queue(
self.oid, self.exchange, self.oid,
durable=False,
auto_delete=True,
expires=self.expires,
)
@cached_property
def oid(self):
# cached here is the app thread OID: name of queue we receive results on.
return self.app.thread_oid

View File

@@ -0,0 +1,87 @@
"""s3 result store backend."""
from kombu.utils.encoding import bytes_to_str
from celery.exceptions import ImproperlyConfigured
from .base import KeyValueStoreBackend
try:
import boto3
import botocore
except ImportError:
boto3 = None
botocore = None
__all__ = ('S3Backend',)
class S3Backend(KeyValueStoreBackend):
"""An S3 task result store.
Raises:
celery.exceptions.ImproperlyConfigured:
if module :pypi:`boto3` is not available,
if the :setting:`aws_access_key_id` or
setting:`aws_secret_access_key` are not set,
or it the :setting:`bucket` is not set.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
if not boto3 or not botocore:
raise ImproperlyConfigured('You must install boto3'
'to use s3 backend')
conf = self.app.conf
self.endpoint_url = conf.get('s3_endpoint_url', None)
self.aws_region = conf.get('s3_region', None)
self.aws_access_key_id = conf.get('s3_access_key_id', None)
self.aws_secret_access_key = conf.get('s3_secret_access_key', None)
self.bucket_name = conf.get('s3_bucket', None)
if not self.bucket_name:
raise ImproperlyConfigured('Missing bucket name')
self.base_path = conf.get('s3_base_path', None)
self._s3_resource = self._connect_to_s3()
def _get_s3_object(self, key):
key_bucket_path = self.base_path + key if self.base_path else key
return self._s3_resource.Object(self.bucket_name, key_bucket_path)
def get(self, key):
key = bytes_to_str(key)
s3_object = self._get_s3_object(key)
try:
s3_object.load()
data = s3_object.get()['Body'].read()
return data if self.content_encoding == 'binary' else data.decode('utf-8')
except botocore.exceptions.ClientError as error:
if error.response['Error']['Code'] == "404":
return None
raise error
def set(self, key, value):
key = bytes_to_str(key)
s3_object = self._get_s3_object(key)
s3_object.put(Body=value)
def delete(self, key):
key = bytes_to_str(key)
s3_object = self._get_s3_object(key)
s3_object.delete()
def _connect_to_s3(self):
session = boto3.Session(
aws_access_key_id=self.aws_access_key_id,
aws_secret_access_key=self.aws_secret_access_key,
region_name=self.aws_region
)
if session.get_credentials() is None:
raise ImproperlyConfigured('Missing aws s3 creds')
return session.resource('s3', endpoint_url=self.endpoint_url)

View File

@@ -0,0 +1,736 @@
"""The periodic task scheduler."""
import copy
import errno
import heapq
import os
import shelve
import sys
import time
import traceback
from calendar import timegm
from collections import namedtuple
from functools import total_ordering
from threading import Event, Thread
from billiard import ensure_multiprocessing
from billiard.common import reset_signals
from billiard.context import Process
from kombu.utils.functional import maybe_evaluate, reprcall
from kombu.utils.objects import cached_property
from . import __version__, platforms, signals
from .exceptions import reraise
from .schedules import crontab, maybe_schedule
from .utils.functional import is_numeric_value
from .utils.imports import load_extension_class_names, symbol_by_name
from .utils.log import get_logger, iter_open_logger_fds
from .utils.time import humanize_seconds, maybe_make_aware
__all__ = (
'SchedulingError', 'ScheduleEntry', 'Scheduler',
'PersistentScheduler', 'Service', 'EmbeddedService',
)
event_t = namedtuple('event_t', ('time', 'priority', 'entry'))
logger = get_logger(__name__)
debug, info, error, warning = (logger.debug, logger.info,
logger.error, logger.warning)
DEFAULT_MAX_INTERVAL = 300 # 5 minutes
class SchedulingError(Exception):
"""An error occurred while scheduling a task."""
class BeatLazyFunc:
"""A lazy function declared in 'beat_schedule' and called before sending to worker.
Example:
beat_schedule = {
'test-every-5-minutes': {
'task': 'test',
'schedule': 300,
'kwargs': {
"current": BeatCallBack(datetime.datetime.now)
}
}
}
"""
def __init__(self, func, *args, **kwargs):
self._func = func
self._func_params = {
"args": args,
"kwargs": kwargs
}
def __call__(self):
return self.delay()
def delay(self):
return self._func(*self._func_params["args"], **self._func_params["kwargs"])
@total_ordering
class ScheduleEntry:
"""An entry in the scheduler.
Arguments:
name (str): see :attr:`name`.
schedule (~celery.schedules.schedule): see :attr:`schedule`.
args (Tuple): see :attr:`args`.
kwargs (Dict): see :attr:`kwargs`.
options (Dict): see :attr:`options`.
last_run_at (~datetime.datetime): see :attr:`last_run_at`.
total_run_count (int): see :attr:`total_run_count`.
relative (bool): Is the time relative to when the server starts?
"""
#: The task name
name = None
#: The schedule (:class:`~celery.schedules.schedule`)
schedule = None
#: Positional arguments to apply.
args = None
#: Keyword arguments to apply.
kwargs = None
#: Task execution options.
options = None
#: The time and date of when this task was last scheduled.
last_run_at = None
#: Total number of times this task has been scheduled.
total_run_count = 0
def __init__(self, name=None, task=None, last_run_at=None,
total_run_count=None, schedule=None, args=(), kwargs=None,
options=None, relative=False, app=None):
self.app = app
self.name = name
self.task = task
self.args = args
self.kwargs = kwargs if kwargs else {}
self.options = options if options else {}
self.schedule = maybe_schedule(schedule, relative, app=self.app)
self.last_run_at = last_run_at or self.default_now()
self.total_run_count = total_run_count or 0
def default_now(self):
return self.schedule.now() if self.schedule else self.app.now()
_default_now = default_now # compat
def _next_instance(self, last_run_at=None):
"""Return new instance, with date and count fields updated."""
return self.__class__(**dict(
self,
last_run_at=last_run_at or self.default_now(),
total_run_count=self.total_run_count + 1,
))
__next__ = next = _next_instance # for 2to3
def __reduce__(self):
return self.__class__, (
self.name, self.task, self.last_run_at, self.total_run_count,
self.schedule, self.args, self.kwargs, self.options,
)
def update(self, other):
"""Update values from another entry.
Will only update "editable" fields:
``task``, ``schedule``, ``args``, ``kwargs``, ``options``.
"""
self.__dict__.update({
'task': other.task, 'schedule': other.schedule,
'args': other.args, 'kwargs': other.kwargs,
'options': other.options,
})
def is_due(self):
"""See :meth:`~celery.schedules.schedule.is_due`."""
return self.schedule.is_due(self.last_run_at)
def __iter__(self):
return iter(vars(self).items())
def __repr__(self):
return '<{name}: {0.name} {call} {0.schedule}'.format(
self,
call=reprcall(self.task, self.args or (), self.kwargs or {}),
name=type(self).__name__,
)
def __lt__(self, other):
if isinstance(other, ScheduleEntry):
# How the object is ordered doesn't really matter, as
# in the scheduler heap, the order is decided by the
# preceding members of the tuple ``(time, priority, entry)``.
#
# If all that's left to order on is the entry then it can
# just as well be random.
return id(self) < id(other)
return NotImplemented
def editable_fields_equal(self, other):
for attr in ('task', 'args', 'kwargs', 'options', 'schedule'):
if getattr(self, attr) != getattr(other, attr):
return False
return True
def __eq__(self, other):
"""Test schedule entries equality.
Will only compare "editable" fields:
``task``, ``schedule``, ``args``, ``kwargs``, ``options``.
"""
return self.editable_fields_equal(other)
def _evaluate_entry_args(entry_args):
if not entry_args:
return []
return [
v() if isinstance(v, BeatLazyFunc) else v
for v in entry_args
]
def _evaluate_entry_kwargs(entry_kwargs):
if not entry_kwargs:
return {}
return {
k: v() if isinstance(v, BeatLazyFunc) else v
for k, v in entry_kwargs.items()
}
class Scheduler:
"""Scheduler for periodic tasks.
The :program:`celery beat` program may instantiate this class
multiple times for introspection purposes, but then with the
``lazy`` argument set. It's important for subclasses to
be idempotent when this argument is set.
Arguments:
schedule (~celery.schedules.schedule): see :attr:`schedule`.
max_interval (int): see :attr:`max_interval`.
lazy (bool): Don't set up the schedule.
"""
Entry = ScheduleEntry
#: The schedule dict/shelve.
schedule = None
#: Maximum time to sleep between re-checking the schedule.
max_interval = DEFAULT_MAX_INTERVAL
#: How often to sync the schedule (3 minutes by default)
sync_every = 3 * 60
#: How many tasks can be called before a sync is forced.
sync_every_tasks = None
_last_sync = None
_tasks_since_sync = 0
logger = logger # compat
def __init__(self, app, schedule=None, max_interval=None,
Producer=None, lazy=False, sync_every_tasks=None, **kwargs):
self.app = app
self.data = maybe_evaluate({} if schedule is None else schedule)
self.max_interval = (max_interval or
app.conf.beat_max_loop_interval or
self.max_interval)
self.Producer = Producer or app.amqp.Producer
self._heap = None
self.old_schedulers = None
self.sync_every_tasks = (
app.conf.beat_sync_every if sync_every_tasks is None
else sync_every_tasks)
if not lazy:
self.setup_schedule()
def install_default_entries(self, data):
entries = {}
if self.app.conf.result_expires and \
not self.app.backend.supports_autoexpire:
if 'celery.backend_cleanup' not in data:
entries['celery.backend_cleanup'] = {
'task': 'celery.backend_cleanup',
'schedule': crontab('0', '4', '*'),
'options': {'expires': 12 * 3600}}
self.update_from_dict(entries)
def apply_entry(self, entry, producer=None):
info('Scheduler: Sending due task %s (%s)', entry.name, entry.task)
try:
result = self.apply_async(entry, producer=producer, advance=False)
except Exception as exc: # pylint: disable=broad-except
error('Message Error: %s\n%s',
exc, traceback.format_stack(), exc_info=True)
else:
if result and hasattr(result, 'id'):
debug('%s sent. id->%s', entry.task, result.id)
else:
debug('%s sent.', entry.task)
def adjust(self, n, drift=-0.010):
if n and n > 0:
return n + drift
return n
def is_due(self, entry):
return entry.is_due()
def _when(self, entry, next_time_to_run, mktime=timegm):
"""Return a utc timestamp, make sure heapq in correct order."""
adjust = self.adjust
as_now = maybe_make_aware(entry.default_now())
return (mktime(as_now.utctimetuple()) +
as_now.microsecond / 1e6 +
(adjust(next_time_to_run) or 0))
def populate_heap(self, event_t=event_t, heapify=heapq.heapify):
"""Populate the heap with the data contained in the schedule."""
priority = 5
self._heap = []
for entry in self.schedule.values():
is_due, next_call_delay = entry.is_due()
self._heap.append(event_t(
self._when(
entry,
0 if is_due else next_call_delay
) or 0,
priority, entry
))
heapify(self._heap)
# pylint disable=redefined-outer-name
def tick(self, event_t=event_t, min=min, heappop=heapq.heappop,
heappush=heapq.heappush):
"""Run a tick - one iteration of the scheduler.
Executes one due task per call.
Returns:
float: preferred delay in seconds for next call.
"""
adjust = self.adjust
max_interval = self.max_interval
if (self._heap is None or
not self.schedules_equal(self.old_schedulers, self.schedule)):
self.old_schedulers = copy.copy(self.schedule)
self.populate_heap()
H = self._heap
if not H:
return max_interval
event = H[0]
entry = event[2]
is_due, next_time_to_run = self.is_due(entry)
if is_due:
verify = heappop(H)
if verify is event:
next_entry = self.reserve(entry)
self.apply_entry(entry, producer=self.producer)
heappush(H, event_t(self._when(next_entry, next_time_to_run),
event[1], next_entry))
return 0
else:
heappush(H, verify)
return min(verify[0], max_interval)
adjusted_next_time_to_run = adjust(next_time_to_run)
return min(adjusted_next_time_to_run if is_numeric_value(adjusted_next_time_to_run) else max_interval,
max_interval)
def schedules_equal(self, old_schedules, new_schedules):
if old_schedules is new_schedules is None:
return True
if old_schedules is None or new_schedules is None:
return False
if set(old_schedules.keys()) != set(new_schedules.keys()):
return False
for name, old_entry in old_schedules.items():
new_entry = new_schedules.get(name)
if not new_entry:
return False
if new_entry != old_entry:
return False
return True
def should_sync(self):
return (
(not self._last_sync or
(time.monotonic() - self._last_sync) > self.sync_every) or
(self.sync_every_tasks and
self._tasks_since_sync >= self.sync_every_tasks)
)
def reserve(self, entry):
new_entry = self.schedule[entry.name] = next(entry)
return new_entry
def apply_async(self, entry, producer=None, advance=True, **kwargs):
# Update time-stamps and run counts before we actually execute,
# so we have that done if an exception is raised (doesn't schedule
# forever.)
entry = self.reserve(entry) if advance else entry
task = self.app.tasks.get(entry.task)
try:
entry_args = _evaluate_entry_args(entry.args)
entry_kwargs = _evaluate_entry_kwargs(entry.kwargs)
if task:
return task.apply_async(entry_args, entry_kwargs,
producer=producer,
**entry.options)
else:
return self.send_task(entry.task, entry_args, entry_kwargs,
producer=producer,
**entry.options)
except Exception as exc: # pylint: disable=broad-except
reraise(SchedulingError, SchedulingError(
"Couldn't apply scheduled task {0.name}: {exc}".format(
entry, exc=exc)), sys.exc_info()[2])
finally:
self._tasks_since_sync += 1
if self.should_sync():
self._do_sync()
def send_task(self, *args, **kwargs):
return self.app.send_task(*args, **kwargs)
def setup_schedule(self):
self.install_default_entries(self.data)
self.merge_inplace(self.app.conf.beat_schedule)
def _do_sync(self):
try:
debug('beat: Synchronizing schedule...')
self.sync()
finally:
self._last_sync = time.monotonic()
self._tasks_since_sync = 0
def sync(self):
pass
def close(self):
self.sync()
def add(self, **kwargs):
entry = self.Entry(app=self.app, **kwargs)
self.schedule[entry.name] = entry
return entry
def _maybe_entry(self, name, entry):
if isinstance(entry, self.Entry):
entry.app = self.app
return entry
return self.Entry(**dict(entry, name=name, app=self.app))
def update_from_dict(self, dict_):
self.schedule.update({
name: self._maybe_entry(name, entry)
for name, entry in dict_.items()
})
def merge_inplace(self, b):
schedule = self.schedule
A, B = set(schedule), set(b)
# Remove items from disk not in the schedule anymore.
for key in A ^ B:
schedule.pop(key, None)
# Update and add new items in the schedule
for key in B:
entry = self.Entry(**dict(b[key], name=key, app=self.app))
if schedule.get(key):
schedule[key].update(entry)
else:
schedule[key] = entry
def _ensure_connected(self):
# callback called for each retry while the connection
# can't be established.
def _error_handler(exc, interval):
error('beat: Connection error: %s. '
'Trying again in %s seconds...', exc, interval)
return self.connection.ensure_connection(
_error_handler, self.app.conf.broker_connection_max_retries
)
def get_schedule(self):
return self.data
def set_schedule(self, schedule):
self.data = schedule
schedule = property(get_schedule, set_schedule)
@cached_property
def connection(self):
return self.app.connection_for_write()
@cached_property
def producer(self):
return self.Producer(self._ensure_connected(), auto_declare=False)
@property
def info(self):
return ''
class PersistentScheduler(Scheduler):
"""Scheduler backed by :mod:`shelve` database."""
persistence = shelve
known_suffixes = ('', '.db', '.dat', '.bak', '.dir')
_store = None
def __init__(self, *args, **kwargs):
self.schedule_filename = kwargs.get('schedule_filename')
super().__init__(*args, **kwargs)
def _remove_db(self):
for suffix in self.known_suffixes:
with platforms.ignore_errno(errno.ENOENT):
os.remove(self.schedule_filename + suffix)
def _open_schedule(self):
return self.persistence.open(self.schedule_filename, writeback=True)
def _destroy_open_corrupted_schedule(self, exc):
error('Removing corrupted schedule file %r: %r',
self.schedule_filename, exc, exc_info=True)
self._remove_db()
return self._open_schedule()
def setup_schedule(self):
try:
self._store = self._open_schedule()
# In some cases there may be different errors from a storage
# backend for corrupted files. Example - DBPageNotFoundError
# exception from bsddb. In such case the file will be
# successfully opened but the error will be raised on first key
# retrieving.
self._store.keys()
except Exception as exc: # pylint: disable=broad-except
self._store = self._destroy_open_corrupted_schedule(exc)
self._create_schedule()
tz = self.app.conf.timezone
stored_tz = self._store.get('tz')
if stored_tz is not None and stored_tz != tz:
warning('Reset: Timezone changed from %r to %r', stored_tz, tz)
self._store.clear() # Timezone changed, reset db!
utc = self.app.conf.enable_utc
stored_utc = self._store.get('utc_enabled')
if stored_utc is not None and stored_utc != utc:
choices = {True: 'enabled', False: 'disabled'}
warning('Reset: UTC changed from %s to %s',
choices[stored_utc], choices[utc])
self._store.clear() # UTC setting changed, reset db!
entries = self._store.setdefault('entries', {})
self.merge_inplace(self.app.conf.beat_schedule)
self.install_default_entries(self.schedule)
self._store.update({
'__version__': __version__,
'tz': tz,
'utc_enabled': utc,
})
self.sync()
debug('Current schedule:\n' + '\n'.join(
repr(entry) for entry in entries.values()))
def _create_schedule(self):
for _ in (1, 2):
try:
self._store['entries']
except KeyError:
# new schedule db
try:
self._store['entries'] = {}
except KeyError as exc:
self._store = self._destroy_open_corrupted_schedule(exc)
continue
else:
if '__version__' not in self._store:
warning('DB Reset: Account for new __version__ field')
self._store.clear() # remove schedule at 2.2.2 upgrade.
elif 'tz' not in self._store:
warning('DB Reset: Account for new tz field')
self._store.clear() # remove schedule at 3.0.8 upgrade
elif 'utc_enabled' not in self._store:
warning('DB Reset: Account for new utc_enabled field')
self._store.clear() # remove schedule at 3.0.9 upgrade
break
def get_schedule(self):
return self._store['entries']
def set_schedule(self, schedule):
self._store['entries'] = schedule
schedule = property(get_schedule, set_schedule)
def sync(self):
if self._store is not None:
self._store.sync()
def close(self):
self.sync()
self._store.close()
@property
def info(self):
return f' . db -> {self.schedule_filename}'
class Service:
"""Celery periodic task service."""
scheduler_cls = PersistentScheduler
def __init__(self, app, max_interval=None, schedule_filename=None,
scheduler_cls=None):
self.app = app
self.max_interval = (max_interval or
app.conf.beat_max_loop_interval)
self.scheduler_cls = scheduler_cls or self.scheduler_cls
self.schedule_filename = (
schedule_filename or app.conf.beat_schedule_filename)
self._is_shutdown = Event()
self._is_stopped = Event()
def __reduce__(self):
return self.__class__, (self.max_interval, self.schedule_filename,
self.scheduler_cls, self.app)
def start(self, embedded_process=False):
info('beat: Starting...')
debug('beat: Ticking with max interval->%s',
humanize_seconds(self.scheduler.max_interval))
signals.beat_init.send(sender=self)
if embedded_process:
signals.beat_embedded_init.send(sender=self)
platforms.set_process_title('celery beat')
try:
while not self._is_shutdown.is_set():
interval = self.scheduler.tick()
if interval and interval > 0.0:
debug('beat: Waking up %s.',
humanize_seconds(interval, prefix='in '))
time.sleep(interval)
if self.scheduler.should_sync():
self.scheduler._do_sync()
except (KeyboardInterrupt, SystemExit):
self._is_shutdown.set()
finally:
self.sync()
def sync(self):
self.scheduler.close()
self._is_stopped.set()
def stop(self, wait=False):
info('beat: Shutting down...')
self._is_shutdown.set()
wait and self._is_stopped.wait() # block until shutdown done.
def get_scheduler(self, lazy=False,
extension_namespace='celery.beat_schedulers'):
filename = self.schedule_filename
aliases = dict(load_extension_class_names(extension_namespace))
return symbol_by_name(self.scheduler_cls, aliases=aliases)(
app=self.app,
schedule_filename=filename,
max_interval=self.max_interval,
lazy=lazy,
)
@cached_property
def scheduler(self):
return self.get_scheduler()
class _Threaded(Thread):
"""Embedded task scheduler using threading."""
def __init__(self, app, **kwargs):
super().__init__()
self.app = app
self.service = Service(app, **kwargs)
self.daemon = True
self.name = 'Beat'
def run(self):
self.app.set_current()
self.service.start()
def stop(self):
self.service.stop(wait=True)
try:
ensure_multiprocessing()
except NotImplementedError: # pragma: no cover
_Process = None
else:
class _Process(Process):
def __init__(self, app, **kwargs):
super().__init__()
self.app = app
self.service = Service(app, **kwargs)
self.name = 'Beat'
def run(self):
reset_signals(full=False)
platforms.close_open_fds([
sys.__stdin__, sys.__stdout__, sys.__stderr__,
] + list(iter_open_logger_fds()))
self.app.set_default()
self.app.set_current()
self.service.start(embedded_process=True)
def stop(self):
self.service.stop()
self.terminate()
def EmbeddedService(app, max_interval=None, **kwargs):
"""Return embedded clock service.
Arguments:
thread (bool): Run threaded instead of as a separate process.
Uses :mod:`multiprocessing` by default, if available.
"""
if kwargs.pop('thread', False) or _Process is None:
# Need short max interval to be able to stop thread
# in reasonable time.
return _Threaded(app, max_interval=1, **kwargs)
return _Process(app, max_interval=max_interval, **kwargs)

Some files were not shown because too many files have changed in this diff Show More