This commit is contained in:
Iliyan Angelov
2025-09-19 11:58:53 +03:00
parent 306b20e24a
commit 6b247e5b9f
11423 changed files with 1500615 additions and 778 deletions

View File

@@ -0,0 +1,48 @@
"""Pool implementation abstract factory, and alias definitions."""
import os
# Import from kombu directly as it's used
# early in the import stage, where celery.utils loads
# too much (e.g., for eventlet patching)
from kombu.utils.imports import symbol_by_name
__all__ = ('get_implementation', 'get_available_pool_names',)
ALIASES = {
'prefork': 'celery.concurrency.prefork:TaskPool',
'eventlet': 'celery.concurrency.eventlet:TaskPool',
'gevent': 'celery.concurrency.gevent:TaskPool',
'solo': 'celery.concurrency.solo:TaskPool',
'processes': 'celery.concurrency.prefork:TaskPool', # XXX compat alias
}
try:
import concurrent.futures # noqa
except ImportError:
pass
else:
ALIASES['threads'] = 'celery.concurrency.thread:TaskPool'
#
# Allow for an out-of-tree worker pool implementation. This is used as follows:
#
# - Set the environment variable CELERY_CUSTOM_WORKER_POOL to the name of
# an implementation of :class:`celery.concurrency.base.BasePool` in the
# standard Celery format of "package:class".
# - Select this pool using '--pool custom'.
#
try:
custom = os.environ.get('CELERY_CUSTOM_WORKER_POOL')
except KeyError:
pass
else:
ALIASES['custom'] = custom
def get_implementation(cls):
"""Return pool implementation by name."""
return symbol_by_name(cls, ALIASES)
def get_available_pool_names():
"""Return all available pool type names."""
return tuple(ALIASES.keys())

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,180 @@
"""Base Execution Pool."""
import logging
import os
import sys
import time
from typing import Any, Dict
from billiard.einfo import ExceptionInfo
from billiard.exceptions import WorkerLostError
from kombu.utils.encoding import safe_repr
from celery.exceptions import WorkerShutdown, WorkerTerminate, reraise
from celery.utils import timer2
from celery.utils.log import get_logger
from celery.utils.text import truncate
__all__ = ('BasePool', 'apply_target')
logger = get_logger('celery.pool')
def apply_target(target, args=(), kwargs=None, callback=None,
accept_callback=None, pid=None, getpid=os.getpid,
propagate=(), monotonic=time.monotonic, **_):
"""Apply function within pool context."""
kwargs = {} if not kwargs else kwargs
if accept_callback:
accept_callback(pid or getpid(), monotonic())
try:
ret = target(*args, **kwargs)
except propagate:
raise
except Exception:
raise
except (WorkerShutdown, WorkerTerminate):
raise
except BaseException as exc:
try:
reraise(WorkerLostError, WorkerLostError(repr(exc)),
sys.exc_info()[2])
except WorkerLostError:
callback(ExceptionInfo())
else:
callback(ret)
class BasePool:
"""Task pool."""
RUN = 0x1
CLOSE = 0x2
TERMINATE = 0x3
Timer = timer2.Timer
#: set to true if the pool can be shutdown from within
#: a signal handler.
signal_safe = True
#: set to true if pool uses greenlets.
is_green = False
_state = None
_pool = None
_does_debug = True
#: only used by multiprocessing pool
uses_semaphore = False
task_join_will_block = True
body_can_be_buffer = False
def __init__(self, limit=None, putlocks=True, forking_enable=True,
callbacks_propagate=(), app=None, **options):
self.limit = limit
self.putlocks = putlocks
self.options = options
self.forking_enable = forking_enable
self.callbacks_propagate = callbacks_propagate
self.app = app
def on_start(self):
pass
def did_start_ok(self):
return True
def flush(self):
pass
def on_stop(self):
pass
def register_with_event_loop(self, loop):
pass
def on_apply(self, *args, **kwargs):
pass
def on_terminate(self):
pass
def on_soft_timeout(self, job):
pass
def on_hard_timeout(self, job):
pass
def maintain_pool(self, *args, **kwargs):
pass
def terminate_job(self, pid, signal=None):
raise NotImplementedError(
f'{type(self)} does not implement kill_job')
def restart(self):
raise NotImplementedError(
f'{type(self)} does not implement restart')
def stop(self):
self.on_stop()
self._state = self.TERMINATE
def terminate(self):
self._state = self.TERMINATE
self.on_terminate()
def start(self):
self._does_debug = logger.isEnabledFor(logging.DEBUG)
self.on_start()
self._state = self.RUN
def close(self):
self._state = self.CLOSE
self.on_close()
def on_close(self):
pass
def apply_async(self, target, args=None, kwargs=None, **options):
"""Equivalent of the :func:`apply` built-in function.
Callbacks should optimally return as soon as possible since
otherwise the thread which handles the result will get blocked.
"""
kwargs = {} if not kwargs else kwargs
args = [] if not args else args
if self._does_debug:
logger.debug('TaskPool: Apply %s (args:%s kwargs:%s)',
target, truncate(safe_repr(args), 1024),
truncate(safe_repr(kwargs), 1024))
return self.on_apply(target, args, kwargs,
waitforslot=self.putlocks,
callbacks_propagate=self.callbacks_propagate,
**options)
def _get_info(self) -> Dict[str, Any]:
"""
Return configuration and statistics information. Subclasses should
augment the data as required.
:return: The returned value must be JSON-friendly.
"""
return {
'implementation': self.__class__.__module__ + ':' + self.__class__.__name__,
'max-concurrency': self.limit,
}
@property
def info(self):
return self._get_info()
@property
def active(self):
return self._state == self.RUN
@property
def num_processes(self):
return self.limit

View File

@@ -0,0 +1,181 @@
"""Eventlet execution pool."""
import sys
from time import monotonic
from greenlet import GreenletExit
from kombu.asynchronous import timer as _timer
from celery import signals
from . import base
__all__ = ('TaskPool',)
W_RACE = """\
Celery module with %s imported before eventlet patched\
"""
RACE_MODS = ('billiard.', 'celery.', 'kombu.')
#: Warn if we couldn't patch early enough,
#: and thread/socket depending celery modules have already been loaded.
for mod in (mod for mod in sys.modules if mod.startswith(RACE_MODS)):
for side in ('thread', 'threading', 'socket'): # pragma: no cover
if getattr(mod, side, None):
import warnings
warnings.warn(RuntimeWarning(W_RACE % side))
def apply_target(target, args=(), kwargs=None, callback=None,
accept_callback=None, getpid=None):
kwargs = {} if not kwargs else kwargs
return base.apply_target(target, args, kwargs, callback, accept_callback,
pid=getpid())
class Timer(_timer.Timer):
"""Eventlet Timer."""
def __init__(self, *args, **kwargs):
from eventlet.greenthread import spawn_after
from greenlet import GreenletExit
super().__init__(*args, **kwargs)
self.GreenletExit = GreenletExit
self._spawn_after = spawn_after
self._queue = set()
def _enter(self, eta, priority, entry, **kwargs):
secs = max(eta - monotonic(), 0)
g = self._spawn_after(secs, entry)
self._queue.add(g)
g.link(self._entry_exit, entry)
g.entry = entry
g.eta = eta
g.priority = priority
g.canceled = False
return g
def _entry_exit(self, g, entry):
try:
try:
g.wait()
except self.GreenletExit:
entry.cancel()
g.canceled = True
finally:
self._queue.discard(g)
def clear(self):
queue = self._queue
while queue:
try:
queue.pop().cancel()
except (KeyError, self.GreenletExit):
pass
def cancel(self, tref):
try:
tref.cancel()
except self.GreenletExit:
pass
@property
def queue(self):
return self._queue
class TaskPool(base.BasePool):
"""Eventlet Task Pool."""
Timer = Timer
signal_safe = False
is_green = True
task_join_will_block = False
_pool = None
_pool_map = None
_quick_put = None
def __init__(self, *args, **kwargs):
from eventlet import greenthread
from eventlet.greenpool import GreenPool
self.Pool = GreenPool
self.getcurrent = greenthread.getcurrent
self.getpid = lambda: id(greenthread.getcurrent())
self.spawn_n = greenthread.spawn_n
super().__init__(*args, **kwargs)
def on_start(self):
self._pool = self.Pool(self.limit)
self._pool_map = {}
signals.eventlet_pool_started.send(sender=self)
self._quick_put = self._pool.spawn
self._quick_apply_sig = signals.eventlet_pool_apply.send
def on_stop(self):
signals.eventlet_pool_preshutdown.send(sender=self)
if self._pool is not None:
self._pool.waitall()
signals.eventlet_pool_postshutdown.send(sender=self)
def on_apply(self, target, args=None, kwargs=None, callback=None,
accept_callback=None, **_):
target = TaskPool._make_killable_target(target)
self._quick_apply_sig(sender=self, target=target, args=args, kwargs=kwargs,)
greenlet = self._quick_put(
apply_target,
target, args,
kwargs,
callback,
accept_callback,
self.getpid
)
self._add_to_pool_map(id(greenlet), greenlet)
def grow(self, n=1):
limit = self.limit + n
self._pool.resize(limit)
self.limit = limit
def shrink(self, n=1):
limit = self.limit - n
self._pool.resize(limit)
self.limit = limit
def terminate_job(self, pid, signal=None):
if pid in self._pool_map.keys():
greenlet = self._pool_map[pid]
greenlet.kill()
greenlet.wait()
def _get_info(self):
info = super()._get_info()
info.update({
'max-concurrency': self.limit,
'free-threads': self._pool.free(),
'running-threads': self._pool.running(),
})
return info
@staticmethod
def _make_killable_target(target):
def killable_target(*args, **kwargs):
try:
return target(*args, **kwargs)
except GreenletExit:
return (False, None, None)
return killable_target
def _add_to_pool_map(self, pid, greenlet):
self._pool_map[pid] = greenlet
greenlet.link(
TaskPool._cleanup_after_job_finish,
self._pool_map,
pid
)
@staticmethod
def _cleanup_after_job_finish(greenlet, pool_map, pid):
del pool_map[pid]

View File

@@ -0,0 +1,171 @@
"""Gevent execution pool."""
import functools
import types
from time import monotonic
from kombu.asynchronous import timer as _timer
from . import base
try:
from gevent import Timeout
except ImportError:
Timeout = None
__all__ = ('TaskPool',)
# pylint: disable=redefined-outer-name
# We cache globals and attribute lookups, so disable this warning.
def apply_target(target, args=(), kwargs=None, callback=None,
accept_callback=None, getpid=None, **_):
kwargs = {} if not kwargs else kwargs
return base.apply_target(target, args, kwargs, callback, accept_callback,
pid=getpid(), **_)
def apply_timeout(target, args=(), kwargs=None, callback=None,
accept_callback=None, getpid=None, timeout=None,
timeout_callback=None, Timeout=Timeout,
apply_target=base.apply_target, **rest):
kwargs = {} if not kwargs else kwargs
try:
with Timeout(timeout):
return apply_target(target, args, kwargs, callback,
accept_callback, getpid(),
propagate=(Timeout,), **rest)
except Timeout:
return timeout_callback(False, timeout)
class Timer(_timer.Timer):
def __init__(self, *args, **kwargs):
from gevent import Greenlet, GreenletExit
class _Greenlet(Greenlet):
cancel = Greenlet.kill
self._Greenlet = _Greenlet
self._GreenletExit = GreenletExit
super().__init__(*args, **kwargs)
self._queue = set()
def _enter(self, eta, priority, entry, **kwargs):
secs = max(eta - monotonic(), 0)
g = self._Greenlet.spawn_later(secs, entry)
self._queue.add(g)
g.link(self._entry_exit)
g.entry = entry
g.eta = eta
g.priority = priority
g.canceled = False
return g
def _entry_exit(self, g):
try:
g.kill()
finally:
self._queue.discard(g)
def clear(self):
queue = self._queue
while queue:
try:
queue.pop().kill()
except KeyError:
pass
@property
def queue(self):
return self._queue
class TaskPool(base.BasePool):
"""GEvent Pool."""
Timer = Timer
signal_safe = False
is_green = True
task_join_will_block = False
_pool = None
_pool_map = None
_quick_put = None
def __init__(self, *args, **kwargs):
from gevent import getcurrent, spawn_raw
from gevent.pool import Pool
self.Pool = Pool
self.getcurrent = getcurrent
self.getpid = lambda: id(getcurrent())
self.spawn_n = spawn_raw
self.timeout = kwargs.get('timeout')
super().__init__(*args, **kwargs)
def on_start(self):
self._pool = self.Pool(self.limit)
self._pool_map = {}
self._quick_put = self._pool.spawn
def on_stop(self):
if self._pool is not None:
self._pool.join()
def on_apply(self, target, args=None, kwargs=None, callback=None,
accept_callback=None, timeout=None,
timeout_callback=None, apply_target=apply_target, **_):
timeout = self.timeout if timeout is None else timeout
target = self._make_killable_target(target)
greenlet = self._quick_put(apply_timeout if timeout else apply_target,
target, args, kwargs, callback, accept_callback,
self.getpid, timeout=timeout, timeout_callback=timeout_callback)
self._add_to_pool_map(id(greenlet), greenlet)
greenlet.terminate = types.MethodType(_terminate, greenlet)
return greenlet
def grow(self, n=1):
self._pool._semaphore.counter += n
self._pool.size += n
def shrink(self, n=1):
self._pool._semaphore.counter -= n
self._pool.size -= n
def terminate_job(self, pid, signal=None):
import gevent
if pid in self._pool_map:
greenlet = self._pool_map[pid]
gevent.kill(greenlet)
@property
def num_processes(self):
return len(self._pool)
@staticmethod
def _make_killable_target(target):
def killable_target(*args, **kwargs):
from greenlet import GreenletExit
try:
return target(*args, **kwargs)
except GreenletExit:
return (False, None, None)
return killable_target
def _add_to_pool_map(self, pid, greenlet):
self._pool_map[pid] = greenlet
greenlet.link(
functools.partial(self._cleanup_after_job_finish, pid=pid, pool_map=self._pool_map),
)
@staticmethod
def _cleanup_after_job_finish(greenlet, pool_map, pid):
del pool_map[pid]
def _terminate(self, signal):
# Done in `TaskPool.terminate_job`
pass

View File

@@ -0,0 +1,172 @@
"""Prefork execution pool.
Pool implementation using :mod:`multiprocessing`.
"""
import os
from billiard import forking_enable
from billiard.common import REMAP_SIGTERM, TERM_SIGNAME
from billiard.pool import CLOSE, RUN
from billiard.pool import Pool as BlockingPool
from celery import platforms, signals
from celery._state import _set_task_join_will_block, set_default_app
from celery.app import trace
from celery.concurrency.base import BasePool
from celery.utils.functional import noop
from celery.utils.log import get_logger
from .asynpool import AsynPool
__all__ = ('TaskPool', 'process_initializer', 'process_destructor')
#: List of signals to reset when a child process starts.
WORKER_SIGRESET = {
'SIGTERM', 'SIGHUP', 'SIGTTIN', 'SIGTTOU', 'SIGUSR1',
}
#: List of signals to ignore when a child process starts.
if REMAP_SIGTERM:
WORKER_SIGIGNORE = {'SIGINT', TERM_SIGNAME}
else:
WORKER_SIGIGNORE = {'SIGINT'}
logger = get_logger(__name__)
warning, debug = logger.warning, logger.debug
def process_initializer(app, hostname):
"""Pool child process initializer.
Initialize the child pool process to ensure the correct
app instance is used and things like logging works.
"""
# Each running worker gets SIGKILL by OS when main process exits.
platforms.set_pdeathsig('SIGKILL')
_set_task_join_will_block(True)
platforms.signals.reset(*WORKER_SIGRESET)
platforms.signals.ignore(*WORKER_SIGIGNORE)
platforms.set_mp_process_title('celeryd', hostname=hostname)
# This is for Windows and other platforms not supporting
# fork(). Note that init_worker makes sure it's only
# run once per process.
app.loader.init_worker()
app.loader.init_worker_process()
logfile = os.environ.get('CELERY_LOG_FILE') or None
if logfile and '%i' in logfile.lower():
# logfile path will differ so need to set up logging again.
app.log.already_setup = False
app.log.setup(int(os.environ.get('CELERY_LOG_LEVEL', 0) or 0),
logfile,
bool(os.environ.get('CELERY_LOG_REDIRECT', False)),
str(os.environ.get('CELERY_LOG_REDIRECT_LEVEL')),
hostname=hostname)
if os.environ.get('FORKED_BY_MULTIPROCESSING'):
# pool did execv after fork
trace.setup_worker_optimizations(app, hostname)
else:
app.set_current()
set_default_app(app)
app.finalize()
trace._tasks = app._tasks # enables fast_trace_task optimization.
# rebuild execution handler for all tasks.
from celery.app.trace import build_tracer
for name, task in app.tasks.items():
task.__trace__ = build_tracer(name, task, app.loader, hostname,
app=app)
from celery.worker import state as worker_state
worker_state.reset_state()
signals.worker_process_init.send(sender=None)
def process_destructor(pid, exitcode):
"""Pool child process destructor.
Dispatch the :signal:`worker_process_shutdown` signal.
"""
signals.worker_process_shutdown.send(
sender=None, pid=pid, exitcode=exitcode,
)
class TaskPool(BasePool):
"""Multiprocessing Pool implementation."""
Pool = AsynPool
BlockingPool = BlockingPool
uses_semaphore = True
write_stats = None
def on_start(self):
forking_enable(self.forking_enable)
Pool = (self.BlockingPool if self.options.get('threads', True)
else self.Pool)
proc_alive_timeout = (
self.app.conf.worker_proc_alive_timeout if self.app
else None
)
P = self._pool = Pool(processes=self.limit,
initializer=process_initializer,
on_process_exit=process_destructor,
enable_timeouts=True,
synack=False,
proc_alive_timeout=proc_alive_timeout,
**self.options)
# Create proxy methods
self.on_apply = P.apply_async
self.maintain_pool = P.maintain_pool
self.terminate_job = P.terminate_job
self.grow = P.grow
self.shrink = P.shrink
self.flush = getattr(P, 'flush', None) # FIXME add to billiard
def restart(self):
self._pool.restart()
self._pool.apply_async(noop)
def did_start_ok(self):
return self._pool.did_start_ok()
def register_with_event_loop(self, loop):
try:
reg = self._pool.register_with_event_loop
except AttributeError:
return
return reg(loop)
def on_stop(self):
"""Gracefully stop the pool."""
if self._pool is not None and self._pool._state in (RUN, CLOSE):
self._pool.close()
self._pool.join()
self._pool = None
def on_terminate(self):
"""Force terminate the pool."""
if self._pool is not None:
self._pool.terminate()
self._pool = None
def on_close(self):
if self._pool is not None and self._pool._state == RUN:
self._pool.close()
def _get_info(self):
write_stats = getattr(self._pool, 'human_write_stats', None)
info = super()._get_info()
info.update({
'max-concurrency': self.limit,
'processes': [p.pid for p in self._pool._pool],
'max-tasks-per-child': self._pool._maxtasksperchild or 'N/A',
'put-guarded-by-semaphore': self.putlocks,
'timeouts': (self._pool.soft_timeout or 0,
self._pool.timeout or 0),
'writes': write_stats() if write_stats is not None else 'N/A',
})
return info
@property
def num_processes(self):
return self._pool._processes

View File

@@ -0,0 +1,31 @@
"""Single-threaded execution pool."""
import os
from celery import signals
from .base import BasePool, apply_target
__all__ = ('TaskPool',)
class TaskPool(BasePool):
"""Solo task pool (blocking, inline, fast)."""
body_can_be_buffer = True
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.on_apply = apply_target
self.limit = 1
signals.worker_process_init.send(sender=None)
def _get_info(self):
info = super()._get_info()
info.update({
'max-concurrency': 1,
'processes': [os.getpid()],
'max-tasks-per-child': None,
'put-guarded-by-semaphore': True,
'timeouts': (),
})
return info

View File

@@ -0,0 +1,64 @@
"""Thread execution pool."""
from __future__ import annotations
from concurrent.futures import Future, ThreadPoolExecutor, wait
from typing import TYPE_CHECKING, Any, Callable
from .base import BasePool, apply_target
__all__ = ('TaskPool',)
if TYPE_CHECKING:
from typing import TypedDict
PoolInfo = TypedDict('PoolInfo', {'max-concurrency': int, 'threads': int})
# `TargetFunction` should be a Protocol that represents fast_trace_task and
# trace_task_ret.
TargetFunction = Callable[..., Any]
class ApplyResult:
def __init__(self, future: Future) -> None:
self.f = future
self.get = self.f.result
def wait(self, timeout: float | None = None) -> None:
wait([self.f], timeout)
class TaskPool(BasePool):
"""Thread Task Pool."""
limit: int
body_can_be_buffer = True
signal_safe = False
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.executor = ThreadPoolExecutor(max_workers=self.limit)
def on_stop(self) -> None:
self.executor.shutdown()
super().on_stop()
def on_apply(
self,
target: TargetFunction,
args: tuple[Any, ...] | None = None,
kwargs: dict[str, Any] | None = None,
callback: Callable[..., Any] | None = None,
accept_callback: Callable[..., Any] | None = None,
**_: Any
) -> ApplyResult:
f = self.executor.submit(apply_target, target, args, kwargs,
callback, accept_callback)
return ApplyResult(f)
def _get_info(self) -> PoolInfo:
info = super()._get_info()
info.update({
'max-concurrency': self.limit,
'threads': len(self.executor._threads)
})
return info