This commit is contained in:
Iliyan Angelov
2025-09-19 11:58:53 +03:00
parent 306b20e24a
commit 6b247e5b9f
11423 changed files with 1500615 additions and 778 deletions

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python
from . import (
exposition, gc_collector, metrics, metrics_core, platform_collector,
process_collector, registry,
)
from .exposition import (
CONTENT_TYPE_LATEST, delete_from_gateway, generate_latest,
instance_ip_grouping_key, make_asgi_app, make_wsgi_app, MetricsHandler,
push_to_gateway, pushadd_to_gateway, start_http_server, start_wsgi_server,
write_to_textfile,
)
from .gc_collector import GC_COLLECTOR, GCCollector
from .metrics import (
Counter, disable_created_metrics, enable_created_metrics, Enum, Gauge,
Histogram, Info, Summary,
)
from .metrics_core import Metric
from .platform_collector import PLATFORM_COLLECTOR, PlatformCollector
from .process_collector import PROCESS_COLLECTOR, ProcessCollector
from .registry import CollectorRegistry, REGISTRY
__all__ = (
'CollectorRegistry',
'REGISTRY',
'Metric',
'Counter',
'Gauge',
'Summary',
'Histogram',
'Info',
'Enum',
'enable_created_metrics',
'disable_created_metrics',
'CONTENT_TYPE_LATEST',
'generate_latest',
'MetricsHandler',
'make_wsgi_app',
'make_asgi_app',
'start_http_server',
'start_wsgi_server',
'write_to_textfile',
'push_to_gateway',
'pushadd_to_gateway',
'delete_from_gateway',
'instance_ip_grouping_key',
'ProcessCollector',
'PROCESS_COLLECTOR',
'PlatformCollector',
'PLATFORM_COLLECTOR',
'GCCollector',
'GC_COLLECTOR',
)
if __name__ == '__main__':
c = Counter('cc', 'A counter')
c.inc()
g = Gauge('gg', 'A gauge')
g.set(17)
s = Summary('ss', 'A summary', ['a', 'b'])
s.labels('c', 'd').observe(17)
h = Histogram('hh', 'A histogram')
h.observe(.6)
start_http_server(8000)
import time
while True:
time.sleep(1)

View File

@@ -0,0 +1,40 @@
from typing import Callable
from urllib.parse import parse_qs
from .exposition import _bake_output
from .registry import CollectorRegistry, REGISTRY
def make_asgi_app(registry: CollectorRegistry = REGISTRY, disable_compression: bool = False) -> Callable:
"""Create a ASGI app which serves the metrics from a registry."""
async def prometheus_app(scope, receive, send):
assert scope.get("type") == "http"
# Prepare parameters
params = parse_qs(scope.get('query_string', b''))
accept_header = ",".join([
value.decode("utf8") for (name, value) in scope.get('headers')
if name.decode("utf8").lower() == 'accept'
])
accept_encoding_header = ",".join([
value.decode("utf8") for (name, value) in scope.get('headers')
if name.decode("utf8").lower() == 'accept-encoding'
])
# Bake output
status, headers, output = _bake_output(registry, accept_header, accept_encoding_header, params, disable_compression)
formatted_headers = []
for header in headers:
formatted_headers.append(tuple(x.encode('utf8') for x in header))
# Return output
payload = await receive()
if payload.get("type") == "http.request":
await send(
{
"type": "http.response.start",
"status": int(status.split(' ')[0]),
"headers": formatted_headers,
}
)
await send({"type": "http.response.body", "body": output})
return prometheus_app

View File

@@ -0,0 +1,94 @@
#!/usr/bin/env python
import logging
import re
import socket
import threading
import time
from timeit import default_timer
from typing import Callable, Tuple
from ..registry import CollectorRegistry, REGISTRY
# Roughly, have to keep to what works as a file name.
# We also remove periods, so labels can be distinguished.
_INVALID_GRAPHITE_CHARS = re.compile(r"[^a-zA-Z0-9_-]")
def _sanitize(s):
return _INVALID_GRAPHITE_CHARS.sub('_', s)
class _RegularPush(threading.Thread):
def __init__(self, pusher, interval, prefix):
super().__init__()
self._pusher = pusher
self._interval = interval
self._prefix = prefix
def run(self):
wait_until = default_timer()
while True:
while True:
now = default_timer()
if now >= wait_until:
# May need to skip some pushes.
while wait_until < now:
wait_until += self._interval
break
# time.sleep can return early.
time.sleep(wait_until - now)
try:
self._pusher.push(prefix=self._prefix)
except OSError:
logging.exception("Push failed")
class GraphiteBridge:
def __init__(self,
address: Tuple[str, int],
registry: CollectorRegistry = REGISTRY,
timeout_seconds: float = 30,
_timer: Callable[[], float] = time.time,
tags: bool = False,
):
self._address = address
self._registry = registry
self._tags = tags
self._timeout = timeout_seconds
self._timer = _timer
def push(self, prefix: str = '') -> None:
now = int(self._timer())
output = []
prefixstr = ''
if prefix:
prefixstr = prefix + '.'
for metric in self._registry.collect():
for s in metric.samples:
if s.labels:
if self._tags:
sep = ';'
fmt = '{0}={1}'
else:
sep = '.'
fmt = '{0}.{1}'
labelstr = sep + sep.join(
[fmt.format(
_sanitize(k), _sanitize(v))
for k, v in sorted(s.labels.items())])
else:
labelstr = ''
output.append(f'{prefixstr}{_sanitize(s.name)}{labelstr} {float(s.value)} {now}\n')
conn = socket.create_connection(self._address, self._timeout)
conn.sendall(''.join(output).encode('ascii'))
conn.close()
def start(self, interval: float = 60.0, prefix: str = '') -> None:
t = _RegularPush(self, interval, prefix)
t.daemon = True
t.start()

View File

@@ -0,0 +1,82 @@
from timeit import default_timer
from types import TracebackType
from typing import (
Any, Callable, Literal, Optional, Tuple, Type, TYPE_CHECKING, TypeVar,
Union,
)
from .decorator import decorate
if TYPE_CHECKING:
from . import Counter
F = TypeVar("F", bound=Callable[..., Any])
class ExceptionCounter:
def __init__(self, counter: "Counter", exception: Union[Type[BaseException], Tuple[Type[BaseException], ...]]) -> None:
self._counter = counter
self._exception = exception
def __enter__(self) -> None:
pass
def __exit__(self, typ: Optional[Type[BaseException]], value: Optional[BaseException], traceback: Optional[TracebackType]) -> Literal[False]:
if isinstance(value, self._exception):
self._counter.inc()
return False
def __call__(self, f: "F") -> "F":
def wrapped(func, *args, **kwargs):
with self:
return func(*args, **kwargs)
return decorate(f, wrapped)
class InprogressTracker:
def __init__(self, gauge):
self._gauge = gauge
def __enter__(self):
self._gauge.inc()
def __exit__(self, typ, value, traceback):
self._gauge.dec()
def __call__(self, f: "F") -> "F":
def wrapped(func, *args, **kwargs):
with self:
return func(*args, **kwargs)
return decorate(f, wrapped)
class Timer:
def __init__(self, metric, callback_name):
self._metric = metric
self._callback_name = callback_name
def _new_timer(self):
return self.__class__(self._metric, self._callback_name)
def __enter__(self):
self._start = default_timer()
return self
def __exit__(self, typ, value, traceback):
# Time can go backwards.
duration = max(default_timer() - self._start, 0)
callback = getattr(self._metric, self._callback_name)
callback(duration)
def labels(self, *args, **kw):
self._metric = self._metric.labels(*args, **kw)
def __call__(self, f: "F") -> "F":
def wrapped(func, *args, **kwargs):
# Obtaining new instance of timer every time
# ensures thread safety and reentrancy.
with self._new_timer():
return func(*args, **kwargs)
return decorate(f, wrapped)

View File

@@ -0,0 +1,34 @@
from .metrics import Counter, Enum, Gauge, Histogram, Info, Summary
from .metrics_core import (
CounterMetricFamily, GaugeHistogramMetricFamily, GaugeMetricFamily,
HistogramMetricFamily, InfoMetricFamily, Metric, StateSetMetricFamily,
SummaryMetricFamily, UnknownMetricFamily, UntypedMetricFamily,
)
from .registry import CollectorRegistry, REGISTRY
from .samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
__all__ = (
'BucketSpan',
'CollectorRegistry',
'Counter',
'CounterMetricFamily',
'Enum',
'Exemplar',
'Gauge',
'GaugeHistogramMetricFamily',
'GaugeMetricFamily',
'Histogram',
'HistogramMetricFamily',
'Info',
'InfoMetricFamily',
'Metric',
'NativeHistogram',
'REGISTRY',
'Sample',
'StateSetMetricFamily',
'Summary',
'SummaryMetricFamily',
'Timestamp',
'UnknownMetricFamily',
'UntypedMetricFamily',
)

View File

@@ -0,0 +1,427 @@
# ######################### LICENSE ############################ #
# Copyright (c) 2005-2016, Michele Simionato
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# Redistributions in bytecode form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.
"""
Decorator module, see http://pypi.python.org/pypi/decorator
for the documentation.
"""
from __future__ import print_function
import collections
import inspect
import itertools
import operator
import re
import sys
__version__ = '4.0.10'
if sys.version_info >= (3,):
from inspect import getfullargspec
def get_init(cls):
return cls.__init__
else:
class getfullargspec(object):
"A quick and dirty replacement for getfullargspec for Python 2.X"
def __init__(self, f):
self.args, self.varargs, self.varkw, self.defaults = \
inspect.getargspec(f)
self.kwonlyargs = []
self.kwonlydefaults = None
def __iter__(self):
yield self.args
yield self.varargs
yield self.varkw
yield self.defaults
getargspec = inspect.getargspec
def get_init(cls):
return cls.__init__.__func__
# getargspec has been deprecated in Python 3.5
ArgSpec = collections.namedtuple(
'ArgSpec', 'args varargs varkw defaults')
def getargspec(f):
"""A replacement for inspect.getargspec"""
spec = getfullargspec(f)
return ArgSpec(spec.args, spec.varargs, spec.varkw, spec.defaults)
DEF = re.compile(r'\s*def\s*([_\w][_\w\d]*)\s*\(')
# basic functionality
class FunctionMaker(object):
"""
An object with the ability to create functions with a given signature.
It has attributes name, doc, module, signature, defaults, dict and
methods update and make.
"""
# Atomic get-and-increment provided by the GIL
_compile_count = itertools.count()
def __init__(self, func=None, name=None, signature=None,
defaults=None, doc=None, module=None, funcdict=None):
self.shortsignature = signature
if func:
# func can be a class or a callable, but not an instance method
self.name = func.__name__
if self.name == '<lambda>': # small hack for lambda functions
self.name = '_lambda_'
self.doc = func.__doc__
self.module = func.__module__
if inspect.isfunction(func):
argspec = getfullargspec(func)
self.annotations = getattr(func, '__annotations__', {})
for a in ('args', 'varargs', 'varkw', 'defaults', 'kwonlyargs',
'kwonlydefaults'):
setattr(self, a, getattr(argspec, a))
for i, arg in enumerate(self.args):
setattr(self, 'arg%d' % i, arg)
if sys.version_info < (3,): # easy way
self.shortsignature = self.signature = (
inspect.formatargspec(
formatvalue=lambda val: "", *argspec)[1:-1])
else: # Python 3 way
allargs = list(self.args)
allshortargs = list(self.args)
if self.varargs:
allargs.append('*' + self.varargs)
allshortargs.append('*' + self.varargs)
elif self.kwonlyargs:
allargs.append('*') # single star syntax
for a in self.kwonlyargs:
allargs.append('%s=None' % a)
allshortargs.append('%s=%s' % (a, a))
if self.varkw:
allargs.append('**' + self.varkw)
allshortargs.append('**' + self.varkw)
self.signature = ', '.join(allargs)
self.shortsignature = ', '.join(allshortargs)
self.dict = func.__dict__.copy()
# func=None happens when decorating a caller
if name:
self.name = name
if signature is not None:
self.signature = signature
if defaults:
self.defaults = defaults
if doc:
self.doc = doc
if module:
self.module = module
if funcdict:
self.dict = funcdict
# check existence required attributes
assert hasattr(self, 'name')
if not hasattr(self, 'signature'):
raise TypeError('You are decorating a non function: %s' % func)
def update(self, func, **kw):
"Update the signature of func with the data in self"
func.__name__ = self.name
func.__doc__ = getattr(self, 'doc', None)
func.__dict__ = getattr(self, 'dict', {})
func.__defaults__ = getattr(self, 'defaults', ())
func.__kwdefaults__ = getattr(self, 'kwonlydefaults', None)
func.__annotations__ = getattr(self, 'annotations', None)
try:
frame = sys._getframe(3)
except AttributeError: # for IronPython and similar implementations
callermodule = '?'
else:
callermodule = frame.f_globals.get('__name__', '?')
func.__module__ = getattr(self, 'module', callermodule)
func.__dict__.update(kw)
def make(self, src_templ, evaldict=None, addsource=False, **attrs):
"Make a new function from a given template and update the signature"
src = src_templ % vars(self) # expand name and signature
evaldict = evaldict or {}
mo = DEF.match(src)
if mo is None:
raise SyntaxError('not a valid function template\n%s' % src)
name = mo.group(1) # extract the function name
names = set([name] + [arg.strip(' *') for arg in
self.shortsignature.split(',')])
for n in names:
if n in ('_func_', '_call_'):
raise NameError('%s is overridden in\n%s' % (n, src))
if not src.endswith('\n'): # add a newline for old Pythons
src += '\n'
# Ensure each generated function has a unique filename for profilers
# (such as cProfile) that depend on the tuple of (<filename>,
# <definition line>, <function name>) being unique.
filename = '<decorator-gen-%d>' % (next(self._compile_count),)
try:
code = compile(src, filename, 'single')
exec(code, evaldict)
except:
print('Error in generated code:', file=sys.stderr)
print(src, file=sys.stderr)
raise
func = evaldict[name]
if addsource:
attrs['__source__'] = src
self.update(func, **attrs)
return func
@classmethod
def create(cls, obj, body, evaldict, defaults=None,
doc=None, module=None, addsource=True, **attrs):
"""
Create a function from the strings name, signature and body.
evaldict is the evaluation dictionary. If addsource is true an
attribute __source__ is added to the result. The attributes attrs
are added, if any.
"""
if isinstance(obj, str): # "name(signature)"
name, rest = obj.strip().split('(', 1)
signature = rest[:-1] # strip a right parens
func = None
else: # a function
name = None
signature = None
func = obj
self = cls(func, name, signature, defaults, doc, module)
ibody = '\n'.join(' ' + line for line in body.splitlines())
return self.make('def %(name)s(%(signature)s):\n' + ibody,
evaldict, addsource, **attrs)
def decorate(func, caller):
"""
decorate(func, caller) decorates a function using a caller.
"""
evaldict = dict(_call_=caller, _func_=func)
fun = FunctionMaker.create(
func, "return _call_(_func_, %(shortsignature)s)",
evaldict, __wrapped__=func)
if hasattr(func, '__qualname__'):
fun.__qualname__ = func.__qualname__
return fun
def decorator(caller, _func=None):
"""decorator(caller) converts a caller function into a decorator"""
if _func is not None: # return a decorated function
# this is obsolete behavior; you should use decorate instead
return decorate(_func, caller)
# else return a decorator function
if inspect.isclass(caller):
name = caller.__name__.lower()
doc = 'decorator(%s) converts functions/generators into ' \
'factories of %s objects' % (caller.__name__, caller.__name__)
elif inspect.isfunction(caller):
if caller.__name__ == '<lambda>':
name = '_lambda_'
else:
name = caller.__name__
doc = caller.__doc__
else: # assume caller is an object with a __call__ method
name = caller.__class__.__name__.lower()
doc = caller.__call__.__doc__
evaldict = dict(_call_=caller, _decorate_=decorate)
return FunctionMaker.create(
'%s(func)' % name, 'return _decorate_(func, _call_)',
evaldict, doc=doc, module=caller.__module__,
__wrapped__=caller)
# ####################### contextmanager ####################### #
try: # Python >= 3.2
from contextlib import _GeneratorContextManager
except ImportError: # Python >= 2.5
from contextlib import GeneratorContextManager as _GeneratorContextManager
class ContextManager(_GeneratorContextManager):
def __call__(self, func):
"""Context manager decorator"""
return FunctionMaker.create(
func, "with _self_: return _func_(%(shortsignature)s)",
dict(_self_=self, _func_=func), __wrapped__=func)
init = getfullargspec(_GeneratorContextManager.__init__)
n_args = len(init.args)
if n_args == 2 and not init.varargs: # (self, genobj) Python 2.7
def __init__(self, g, *a, **k):
return _GeneratorContextManager.__init__(self, g(*a, **k))
ContextManager.__init__ = __init__
elif n_args == 2 and init.varargs: # (self, gen, *a, **k) Python 3.4
pass
elif n_args == 4: # (self, gen, args, kwds) Python 3.5
def __init__(self, g, *a, **k):
return _GeneratorContextManager.__init__(self, g, a, k)
ContextManager.__init__ = __init__
contextmanager = decorator(ContextManager)
# ############################ dispatch_on ############################ #
def append(a, vancestors):
"""
Append ``a`` to the list of the virtual ancestors, unless it is already
included.
"""
add = True
for j, va in enumerate(vancestors):
if issubclass(va, a):
add = False
break
if issubclass(a, va):
vancestors[j] = a
add = False
if add:
vancestors.append(a)
# inspired from simplegeneric by P.J. Eby and functools.singledispatch
def dispatch_on(*dispatch_args):
"""
Factory of decorators turning a function into a generic function
dispatching on the given arguments.
"""
assert dispatch_args, 'No dispatch args passed'
dispatch_str = '(%s,)' % ', '.join(dispatch_args)
def check(arguments, wrong=operator.ne, msg=''):
"""Make sure one passes the expected number of arguments"""
if wrong(len(arguments), len(dispatch_args)):
raise TypeError('Expected %d arguments, got %d%s' %
(len(dispatch_args), len(arguments), msg))
def gen_func_dec(func):
"""Decorator turning a function into a generic function"""
# first check the dispatch arguments
argset = set(getfullargspec(func).args)
if not set(dispatch_args) <= argset:
raise NameError('Unknown dispatch arguments %s' % dispatch_str)
typemap = {}
def vancestors(*types):
"""
Get a list of sets of virtual ancestors for the given types
"""
check(types)
ras = [[] for _ in range(len(dispatch_args))]
for types_ in typemap:
for t, type_, ra in zip(types, types_, ras):
if issubclass(t, type_) and type_ not in t.__mro__:
append(type_, ra)
return [set(ra) for ra in ras]
def ancestors(*types):
"""
Get a list of virtual MROs, one for each type
"""
check(types)
lists = []
for t, vas in zip(types, vancestors(*types)):
n_vas = len(vas)
if n_vas > 1:
raise RuntimeError(
'Ambiguous dispatch for %s: %s' % (t, vas))
elif n_vas == 1:
va, = vas
mro = type('t', (t, va), {}).__mro__[1:]
else:
mro = t.__mro__
lists.append(mro[:-1]) # discard t and object
return lists
def register(*types):
"""
Decorator to register an implementation for the given types
"""
check(types)
def dec(f):
check(getfullargspec(f).args, operator.lt, ' in ' + f.__name__)
typemap[types] = f
return f
return dec
def dispatch_info(*types):
"""
An utility to introspect the dispatch algorithm
"""
check(types)
lst = []
for anc in itertools.product(*ancestors(*types)):
lst.append(tuple(a.__name__ for a in anc))
return lst
def _dispatch(dispatch_args, *args, **kw):
types = tuple(type(arg) for arg in dispatch_args)
try: # fast path
f = typemap[types]
except KeyError:
pass
else:
return f(*args, **kw)
combinations = itertools.product(*ancestors(*types))
next(combinations) # the first one has been already tried
for types_ in combinations:
f = typemap.get(types_)
if f is not None:
return f(*args, **kw)
# else call the default implementation
return func(*args, **kw)
return FunctionMaker.create(
func, 'return _f_(%s, %%(shortsignature)s)' % dispatch_str,
dict(_f_=_dispatch), register=register, default=func,
typemap=typemap, vancestors=vancestors, ancestors=ancestors,
dispatch_info=dispatch_info, __wrapped__=func)
gen_func_dec.__name__ = 'dispatch_on' + dispatch_str
return gen_func_dec

View File

@@ -0,0 +1,679 @@
import base64
from contextlib import closing
import gzip
from http.server import BaseHTTPRequestHandler
import os
import socket
from socketserver import ThreadingMixIn
import ssl
import sys
import threading
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
from urllib.error import HTTPError
from urllib.parse import parse_qs, quote_plus, urlparse
from urllib.request import (
BaseHandler, build_opener, HTTPHandler, HTTPRedirectHandler, HTTPSHandler,
Request,
)
from wsgiref.simple_server import make_server, WSGIRequestHandler, WSGIServer
from .openmetrics import exposition as openmetrics
from .registry import CollectorRegistry, REGISTRY
from .utils import floatToGoString
from .validation import _is_valid_legacy_metric_name
__all__ = (
'CONTENT_TYPE_LATEST',
'delete_from_gateway',
'generate_latest',
'instance_ip_grouping_key',
'make_asgi_app',
'make_wsgi_app',
'MetricsHandler',
'push_to_gateway',
'pushadd_to_gateway',
'start_http_server',
'start_wsgi_server',
'write_to_textfile',
)
CONTENT_TYPE_LATEST = 'text/plain; version=0.0.4; charset=utf-8'
"""Content type of the latest text format"""
class _PrometheusRedirectHandler(HTTPRedirectHandler):
"""
Allow additional methods (e.g. PUT) and data forwarding in redirects.
Use of this class constitute a user's explicit agreement to the
redirect responses the Prometheus client will receive when using it.
You should only use this class if you control or otherwise trust the
redirect behavior involved and are certain it is safe to full transfer
the original request (method and data) to the redirected URL. For
example, if you know there is a cosmetic URL redirect in front of a
local deployment of a Prometheus server, and all redirects are safe,
this is the class to use to handle redirects in that case.
The standard HTTPRedirectHandler does not forward request data nor
does it allow redirected PUT requests (which Prometheus uses for some
operations, for example `push_to_gateway`) because these cannot
generically guarantee no violations of HTTP RFC 2616 requirements for
the user to explicitly confirm redirects that could have unexpected
side effects (such as rendering a PUT request non-idempotent or
creating multiple resources not named in the original request).
"""
def redirect_request(self, req, fp, code, msg, headers, newurl):
"""
Apply redirect logic to a request.
See parent HTTPRedirectHandler.redirect_request for parameter info.
If the redirect is disallowed, this raises the corresponding HTTP error.
If the redirect can't be determined, return None to allow other handlers
to try. If the redirect is allowed, return the new request.
This method specialized for the case when (a) the user knows that the
redirect will not cause unacceptable side effects for any request method,
and (b) the user knows that any request data should be passed through to
the redirect. If either condition is not met, this should not be used.
"""
# note that requests being provided by a handler will use get_method to
# indicate the method, by monkeypatching this, instead of setting the
# Request object's method attribute.
m = getattr(req, "method", req.get_method())
if not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
or code in (301, 302, 303) and m in ("POST", "PUT")):
raise HTTPError(req.full_url, code, msg, headers, fp)
new_request = Request(
newurl.replace(' ', '%20'), # space escaping in new url if needed.
headers=req.headers,
origin_req_host=req.origin_req_host,
unverifiable=True,
data=req.data,
)
new_request.method = m
return new_request
def _bake_output(registry, accept_header, accept_encoding_header, params, disable_compression):
"""Bake output for metrics output."""
# Choose the correct plain text format of the output.
encoder, content_type = choose_encoder(accept_header)
if 'name[]' in params:
registry = registry.restricted_registry(params['name[]'])
output = encoder(registry)
headers = [('Content-Type', content_type)]
# If gzip encoding required, gzip the output.
if not disable_compression and gzip_accepted(accept_encoding_header):
output = gzip.compress(output)
headers.append(('Content-Encoding', 'gzip'))
return '200 OK', headers, output
def make_wsgi_app(registry: CollectorRegistry = REGISTRY, disable_compression: bool = False) -> Callable:
"""Create a WSGI app which serves the metrics from a registry."""
def prometheus_app(environ, start_response):
# Prepare parameters
accept_header = environ.get('HTTP_ACCEPT')
accept_encoding_header = environ.get('HTTP_ACCEPT_ENCODING')
params = parse_qs(environ.get('QUERY_STRING', ''))
method = environ['REQUEST_METHOD']
if method == 'OPTIONS':
status = '200 OK'
headers = [('Allow', 'OPTIONS,GET')]
output = b''
elif method != 'GET':
status = '405 Method Not Allowed'
headers = [('Allow', 'OPTIONS,GET')]
output = '# HTTP {}: {}; use OPTIONS or GET\n'.format(status, method).encode()
elif environ['PATH_INFO'] == '/favicon.ico':
# Serve empty response for browsers
status = '200 OK'
headers = []
output = b''
else:
# Note: For backwards compatibility, the URI path for GET is not
# constrained to the documented /metrics, but any path is allowed.
# Bake output
status, headers, output = _bake_output(registry, accept_header, accept_encoding_header, params, disable_compression)
# Return output
start_response(status, headers)
return [output]
return prometheus_app
class _SilentHandler(WSGIRequestHandler):
"""WSGI handler that does not log requests."""
def log_message(self, format, *args):
"""Log nothing."""
class ThreadingWSGIServer(ThreadingMixIn, WSGIServer):
"""Thread per request HTTP server."""
# Make worker threads "fire and forget". Beginning with Python 3.7 this
# prevents a memory leak because ``ThreadingMixIn`` starts to gather all
# non-daemon threads in a list in order to join on them at server close.
daemon_threads = True
def _get_best_family(address, port):
"""Automatically select address family depending on address"""
# HTTPServer defaults to AF_INET, which will not start properly if
# binding an ipv6 address is requested.
# This function is based on what upstream python did for http.server
# in https://github.com/python/cpython/pull/11767
infos = socket.getaddrinfo(address, port, type=socket.SOCK_STREAM, flags=socket.AI_PASSIVE)
family, _, _, _, sockaddr = next(iter(infos))
return family, sockaddr[0]
def _get_ssl_ctx(
certfile: str,
keyfile: str,
protocol: int,
cafile: Optional[str] = None,
capath: Optional[str] = None,
client_auth_required: bool = False,
) -> ssl.SSLContext:
"""Load context supports SSL."""
ssl_cxt = ssl.SSLContext(protocol=protocol)
if cafile is not None or capath is not None:
try:
ssl_cxt.load_verify_locations(cafile, capath)
except IOError as exc:
exc_type = type(exc)
msg = str(exc)
raise exc_type(f"Cannot load CA certificate chain from file "
f"{cafile!r} or directory {capath!r}: {msg}")
else:
try:
ssl_cxt.load_default_certs(purpose=ssl.Purpose.CLIENT_AUTH)
except IOError as exc:
exc_type = type(exc)
msg = str(exc)
raise exc_type(f"Cannot load default CA certificate chain: {msg}")
if client_auth_required:
ssl_cxt.verify_mode = ssl.CERT_REQUIRED
try:
ssl_cxt.load_cert_chain(certfile=certfile, keyfile=keyfile)
except IOError as exc:
exc_type = type(exc)
msg = str(exc)
raise exc_type(f"Cannot load server certificate file {certfile!r} or "
f"its private key file {keyfile!r}: {msg}")
return ssl_cxt
def start_wsgi_server(
port: int,
addr: str = '0.0.0.0',
registry: CollectorRegistry = REGISTRY,
certfile: Optional[str] = None,
keyfile: Optional[str] = None,
client_cafile: Optional[str] = None,
client_capath: Optional[str] = None,
protocol: int = ssl.PROTOCOL_TLS_SERVER,
client_auth_required: bool = False,
) -> Tuple[WSGIServer, threading.Thread]:
"""Starts a WSGI server for prometheus metrics as a daemon thread."""
class TmpServer(ThreadingWSGIServer):
"""Copy of ThreadingWSGIServer to update address_family locally"""
TmpServer.address_family, addr = _get_best_family(addr, port)
app = make_wsgi_app(registry)
httpd = make_server(addr, port, app, TmpServer, handler_class=_SilentHandler)
if certfile and keyfile:
context = _get_ssl_ctx(certfile, keyfile, protocol, client_cafile, client_capath, client_auth_required)
httpd.socket = context.wrap_socket(httpd.socket, server_side=True)
t = threading.Thread(target=httpd.serve_forever)
t.daemon = True
t.start()
return httpd, t
start_http_server = start_wsgi_server
def generate_latest(registry: CollectorRegistry = REGISTRY) -> bytes:
"""Returns the metrics from the registry in latest text format as a string."""
def sample_line(samples):
if samples.labels:
labelstr = '{0}'.format(','.join(
['{}="{}"'.format(
openmetrics.escape_label_name(k), openmetrics._escape(v))
for k, v in sorted(samples.labels.items())]))
else:
labelstr = ''
timestamp = ''
if samples.timestamp is not None:
# Convert to milliseconds.
timestamp = f' {int(float(samples.timestamp) * 1000):d}'
if _is_valid_legacy_metric_name(samples.name):
if labelstr:
labelstr = '{{{0}}}'.format(labelstr)
return f'{samples.name}{labelstr} {floatToGoString(samples.value)}{timestamp}\n'
maybe_comma = ''
if labelstr:
maybe_comma = ','
return f'{{{openmetrics.escape_metric_name(samples.name)}{maybe_comma}{labelstr}}} {floatToGoString(samples.value)}{timestamp}\n'
output = []
for metric in registry.collect():
try:
mname = metric.name
mtype = metric.type
# Munging from OpenMetrics into Prometheus format.
if mtype == 'counter':
mname = mname + '_total'
elif mtype == 'info':
mname = mname + '_info'
mtype = 'gauge'
elif mtype == 'stateset':
mtype = 'gauge'
elif mtype == 'gaugehistogram':
# A gauge histogram is really a gauge,
# but this captures the structure better.
mtype = 'histogram'
elif mtype == 'unknown':
mtype = 'untyped'
output.append('# HELP {} {}\n'.format(
openmetrics.escape_metric_name(mname), metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {openmetrics.escape_metric_name(mname)} {mtype}\n')
om_samples: Dict[str, List[str]] = {}
for s in metric.samples:
for suffix in ['_created', '_gsum', '_gcount']:
if s.name == metric.name + suffix:
# OpenMetrics specific sample, put in a gauge at the end.
om_samples.setdefault(suffix, []).append(sample_line(s))
break
else:
output.append(sample_line(s))
except Exception as exception:
exception.args = (exception.args or ('',)) + (metric,)
raise
for suffix, lines in sorted(om_samples.items()):
output.append('# HELP {} {}\n'.format(openmetrics.escape_metric_name(metric.name + suffix),
metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {openmetrics.escape_metric_name(metric.name + suffix)} gauge\n')
output.extend(lines)
return ''.join(output).encode('utf-8')
def choose_encoder(accept_header: str) -> Tuple[Callable[[CollectorRegistry], bytes], str]:
accept_header = accept_header or ''
for accepted in accept_header.split(','):
if accepted.split(';')[0].strip() == 'application/openmetrics-text':
return (openmetrics.generate_latest,
openmetrics.CONTENT_TYPE_LATEST)
return generate_latest, CONTENT_TYPE_LATEST
def gzip_accepted(accept_encoding_header: str) -> bool:
accept_encoding_header = accept_encoding_header or ''
for accepted in accept_encoding_header.split(','):
if accepted.split(';')[0].strip().lower() == 'gzip':
return True
return False
class MetricsHandler(BaseHTTPRequestHandler):
"""HTTP handler that gives metrics from ``REGISTRY``."""
registry: CollectorRegistry = REGISTRY
def do_GET(self) -> None:
# Prepare parameters
registry = self.registry
accept_header = self.headers.get('Accept')
accept_encoding_header = self.headers.get('Accept-Encoding')
params = parse_qs(urlparse(self.path).query)
# Bake output
status, headers, output = _bake_output(registry, accept_header, accept_encoding_header, params, False)
# Return output
self.send_response(int(status.split(' ')[0]))
for header in headers:
self.send_header(*header)
self.end_headers()
self.wfile.write(output)
def log_message(self, format: str, *args: Any) -> None:
"""Log nothing."""
@classmethod
def factory(cls, registry: CollectorRegistry) -> type:
"""Returns a dynamic MetricsHandler class tied
to the passed registry.
"""
# This implementation relies on MetricsHandler.registry
# (defined above and defaulted to REGISTRY).
# As we have unicode_literals, we need to create a str()
# object for type().
cls_name = str(cls.__name__)
MyMetricsHandler = type(cls_name, (cls, object),
{"registry": registry})
return MyMetricsHandler
def write_to_textfile(path: str, registry: CollectorRegistry) -> None:
"""Write metrics to the given path.
This is intended for use with the Node exporter textfile collector.
The path must end in .prom for the textfile collector to process it."""
tmppath = f'{path}.{os.getpid()}.{threading.current_thread().ident}'
try:
with open(tmppath, 'wb') as f:
f.write(generate_latest(registry))
# rename(2) is atomic but fails on Windows if the destination file exists
if os.name == 'nt':
os.replace(tmppath, path)
else:
os.rename(tmppath, path)
except Exception:
if os.path.exists(tmppath):
os.remove(tmppath)
raise
def _make_handler(
url: str,
method: str,
timeout: Optional[float],
headers: Sequence[Tuple[str, str]],
data: bytes,
base_handler: Union[BaseHandler, type],
) -> Callable[[], None]:
def handle() -> None:
request = Request(url, data=data)
request.get_method = lambda: method # type: ignore
for k, v in headers:
request.add_header(k, v)
resp = build_opener(base_handler).open(request, timeout=timeout)
if resp.code >= 400:
raise OSError(f"error talking to pushgateway: {resp.code} {resp.msg}")
return handle
def default_handler(
url: str,
method: str,
timeout: Optional[float],
headers: List[Tuple[str, str]],
data: bytes,
) -> Callable[[], None]:
"""Default handler that implements HTTP/HTTPS connections.
Used by the push_to_gateway functions. Can be re-used by other handlers."""
return _make_handler(url, method, timeout, headers, data, HTTPHandler)
def passthrough_redirect_handler(
url: str,
method: str,
timeout: Optional[float],
headers: List[Tuple[str, str]],
data: bytes,
) -> Callable[[], None]:
"""
Handler that automatically trusts redirect responses for all HTTP methods.
Augments standard HTTPRedirectHandler capability by permitting PUT requests,
preserving the method upon redirect, and passing through all headers and
data from the original request. Only use this handler if you control or
trust the source of redirect responses you encounter when making requests
via the Prometheus client. This handler will simply repeat the identical
request, including same method and data, to the new redirect URL."""
return _make_handler(url, method, timeout, headers, data, _PrometheusRedirectHandler)
def basic_auth_handler(
url: str,
method: str,
timeout: Optional[float],
headers: List[Tuple[str, str]],
data: bytes,
username: Optional[str] = None,
password: Optional[str] = None,
) -> Callable[[], None]:
"""Handler that implements HTTP/HTTPS connections with Basic Auth.
Sets auth headers using supplied 'username' and 'password', if set.
Used by the push_to_gateway functions. Can be re-used by other handlers."""
def handle():
"""Handler that implements HTTP Basic Auth.
"""
if username is not None and password is not None:
auth_value = f'{username}:{password}'.encode()
auth_token = base64.b64encode(auth_value)
auth_header = b'Basic ' + auth_token
headers.append(('Authorization', auth_header))
default_handler(url, method, timeout, headers, data)()
return handle
def tls_auth_handler(
url: str,
method: str,
timeout: Optional[float],
headers: List[Tuple[str, str]],
data: bytes,
certfile: str,
keyfile: str,
cafile: Optional[str] = None,
protocol: int = ssl.PROTOCOL_TLS_CLIENT,
insecure_skip_verify: bool = False,
) -> Callable[[], None]:
"""Handler that implements an HTTPS connection with TLS Auth.
The default protocol (ssl.PROTOCOL_TLS_CLIENT) will also enable
ssl.CERT_REQUIRED and SSLContext.check_hostname by default. This can be
disabled by setting insecure_skip_verify to True.
Both this handler and the TLS feature on pushgateay are experimental."""
context = ssl.SSLContext(protocol=protocol)
if cafile is not None:
context.load_verify_locations(cafile)
else:
context.load_default_certs()
if insecure_skip_verify:
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
context.load_cert_chain(certfile=certfile, keyfile=keyfile)
handler = HTTPSHandler(context=context)
return _make_handler(url, method, timeout, headers, data, handler)
def push_to_gateway(
gateway: str,
job: str,
registry: CollectorRegistry,
grouping_key: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = 30,
handler: Callable = default_handler,
) -> None:
"""Push metrics to the given pushgateway.
`gateway` the url for your push gateway. Either of the form
'http://pushgateway.local', or 'pushgateway.local'.
Scheme defaults to 'http' if none is provided
`job` is the job label to be attached to all pushed metrics
`registry` is an instance of CollectorRegistry
`grouping_key` please see the pushgateway documentation for details.
Defaults to None
`timeout` is how long push will attempt to connect before giving up.
Defaults to 30s, can be set to None for no timeout.
`handler` is an optional function which can be provided to perform
requests to the 'gateway'.
Defaults to None, in which case an http or https request
will be carried out by a default handler.
If not None, the argument must be a function which accepts
the following arguments:
url, method, timeout, headers, and content
May be used to implement additional functionality not
supported by the built-in default handler (such as SSL
client certicates, and HTTP authentication mechanisms).
'url' is the URL for the request, the 'gateway' argument
described earlier will form the basis of this URL.
'method' is the HTTP method which should be used when
carrying out the request.
'timeout' requests not successfully completed after this
many seconds should be aborted. If timeout is None, then
the handler should not set a timeout.
'headers' is a list of ("header-name","header-value") tuples
which must be passed to the pushgateway in the form of HTTP
request headers.
The function should raise an exception (e.g. IOError) on
failure.
'content' is the data which should be used to form the HTTP
Message Body.
This overwrites all metrics with the same job and grouping_key.
This uses the PUT HTTP method."""
_use_gateway('PUT', gateway, job, registry, grouping_key, timeout, handler)
def pushadd_to_gateway(
gateway: str,
job: str,
registry: Optional[CollectorRegistry],
grouping_key: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = 30,
handler: Callable = default_handler,
) -> None:
"""PushAdd metrics to the given pushgateway.
`gateway` the url for your push gateway. Either of the form
'http://pushgateway.local', or 'pushgateway.local'.
Scheme defaults to 'http' if none is provided
`job` is the job label to be attached to all pushed metrics
`registry` is an instance of CollectorRegistry
`grouping_key` please see the pushgateway documentation for details.
Defaults to None
`timeout` is how long push will attempt to connect before giving up.
Defaults to 30s, can be set to None for no timeout.
`handler` is an optional function which can be provided to perform
requests to the 'gateway'.
Defaults to None, in which case an http or https request
will be carried out by a default handler.
See the 'prometheus_client.push_to_gateway' documentation
for implementation requirements.
This replaces metrics with the same name, job and grouping_key.
This uses the POST HTTP method."""
_use_gateway('POST', gateway, job, registry, grouping_key, timeout, handler)
def delete_from_gateway(
gateway: str,
job: str,
grouping_key: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = 30,
handler: Callable = default_handler,
) -> None:
"""Delete metrics from the given pushgateway.
`gateway` the url for your push gateway. Either of the form
'http://pushgateway.local', or 'pushgateway.local'.
Scheme defaults to 'http' if none is provided
`job` is the job label to be attached to all pushed metrics
`grouping_key` please see the pushgateway documentation for details.
Defaults to None
`timeout` is how long delete will attempt to connect before giving up.
Defaults to 30s, can be set to None for no timeout.
`handler` is an optional function which can be provided to perform
requests to the 'gateway'.
Defaults to None, in which case an http or https request
will be carried out by a default handler.
See the 'prometheus_client.push_to_gateway' documentation
for implementation requirements.
This deletes metrics with the given job and grouping_key.
This uses the DELETE HTTP method."""
_use_gateway('DELETE', gateway, job, None, grouping_key, timeout, handler)
def _use_gateway(
method: str,
gateway: str,
job: str,
registry: Optional[CollectorRegistry],
grouping_key: Optional[Dict[str, Any]],
timeout: Optional[float],
handler: Callable,
) -> None:
gateway_url = urlparse(gateway)
# See https://bugs.python.org/issue27657 for details on urlparse in py>=3.7.6.
if not gateway_url.scheme or gateway_url.scheme not in ['http', 'https']:
gateway = f'http://{gateway}'
gateway = gateway.rstrip('/')
url = '{}/metrics/{}/{}'.format(gateway, *_escape_grouping_key("job", job))
data = b''
if method != 'DELETE':
if registry is None:
registry = REGISTRY
data = generate_latest(registry)
if grouping_key is None:
grouping_key = {}
url += ''.join(
'/{}/{}'.format(*_escape_grouping_key(str(k), str(v)))
for k, v in sorted(grouping_key.items()))
handler(
url=url, method=method, timeout=timeout,
headers=[('Content-Type', CONTENT_TYPE_LATEST)], data=data,
)()
def _escape_grouping_key(k, v):
if v == "":
# Per https://github.com/prometheus/pushgateway/pull/346.
return k + "@base64", "="
elif '/' in v:
# Added in Pushgateway 0.9.0.
return k + "@base64", base64.urlsafe_b64encode(v.encode("utf-8")).decode("utf-8")
else:
return k, quote_plus(v)
def instance_ip_grouping_key() -> Dict[str, Any]:
"""Grouping key with instance set to the IP Address of this host."""
with closing(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) as s:
if sys.platform == 'darwin':
# This check is done this way only on MacOS devices
# it is done this way because the localhost method does
# not work.
# This method was adapted from this StackOverflow answer:
# https://stackoverflow.com/a/28950776
s.connect(('10.255.255.255', 1))
else:
s.connect(('localhost', 0))
return {'instance': s.getsockname()[0]}
from .asgi import make_asgi_app # noqa

View File

@@ -0,0 +1,45 @@
import gc
import platform
from typing import Iterable
from .metrics_core import CounterMetricFamily, Metric
from .registry import Collector, CollectorRegistry, REGISTRY
class GCCollector(Collector):
"""Collector for Garbage collection statistics."""
def __init__(self, registry: CollectorRegistry = REGISTRY):
if not hasattr(gc, 'get_stats') or platform.python_implementation() != 'CPython':
return
registry.register(self)
def collect(self) -> Iterable[Metric]:
collected = CounterMetricFamily(
'python_gc_objects_collected',
'Objects collected during gc',
labels=['generation'],
)
uncollectable = CounterMetricFamily(
'python_gc_objects_uncollectable',
'Uncollectable objects found during GC',
labels=['generation'],
)
collections = CounterMetricFamily(
'python_gc_collections',
'Number of times this generation was collected',
labels=['generation'],
)
for gen, stat in enumerate(gc.get_stats()):
generation = str(gen)
collected.add_metric([generation], value=stat['collected'])
uncollectable.add_metric([generation], value=stat['uncollectable'])
collections.add_metric([generation], value=stat['collections'])
return [collected, uncollectable, collections]
GC_COLLECTOR = GCCollector()
"""Default GCCollector in default Registry REGISTRY."""

View File

@@ -0,0 +1,753 @@
import os
from threading import Lock
import time
import types
from typing import (
Any, Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple,
Type, TypeVar, Union,
)
import warnings
from . import values # retain this import style for testability
from .context_managers import ExceptionCounter, InprogressTracker, Timer
from .metrics_core import Metric
from .registry import Collector, CollectorRegistry, REGISTRY
from .samples import Exemplar, Sample
from .utils import floatToGoString, INF
from .validation import (
_validate_exemplar, _validate_labelnames, _validate_metric_name,
)
T = TypeVar('T', bound='MetricWrapperBase')
F = TypeVar("F", bound=Callable[..., Any])
def _build_full_name(metric_type, name, namespace, subsystem, unit):
if not name:
raise ValueError('Metric name should not be empty')
full_name = ''
if namespace:
full_name += namespace + '_'
if subsystem:
full_name += subsystem + '_'
full_name += name
if metric_type == 'counter' and full_name.endswith('_total'):
full_name = full_name[:-6] # Munge to OpenMetrics.
if unit and not full_name.endswith("_" + unit):
full_name += "_" + unit
if unit and metric_type in ('info', 'stateset'):
raise ValueError('Metric name is of a type that cannot have a unit: ' + full_name)
return full_name
def _get_use_created() -> bool:
return os.environ.get("PROMETHEUS_DISABLE_CREATED_SERIES", 'False').lower() not in ('true', '1', 't')
_use_created = _get_use_created()
def disable_created_metrics():
"""Disable exporting _created metrics on counters, histograms, and summaries."""
global _use_created
_use_created = False
def enable_created_metrics():
"""Enable exporting _created metrics on counters, histograms, and summaries."""
global _use_created
_use_created = True
class MetricWrapperBase(Collector):
_type: Optional[str] = None
_reserved_labelnames: Sequence[str] = ()
def _is_observable(self):
# Whether this metric is observable, i.e.
# * a metric without label names and values, or
# * the child of a labelled metric.
return not self._labelnames or (self._labelnames and self._labelvalues)
def _raise_if_not_observable(self):
# Functions that mutate the state of the metric, for example incrementing
# a counter, will fail if the metric is not observable, because only if a
# metric is observable will the value be initialized.
if not self._is_observable():
raise ValueError('%s metric is missing label values' % str(self._type))
def _is_parent(self):
return self._labelnames and not self._labelvalues
def _get_metric(self):
return Metric(self._name, self._documentation, self._type, self._unit)
def describe(self) -> Iterable[Metric]:
return [self._get_metric()]
def collect(self) -> Iterable[Metric]:
metric = self._get_metric()
for suffix, labels, value, timestamp, exemplar, native_histogram_value in self._samples():
metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar, native_histogram_value)
return [metric]
def __str__(self) -> str:
return f"{self._type}:{self._name}"
def __repr__(self) -> str:
metric_type = type(self)
return f"{metric_type.__module__}.{metric_type.__name__}({self._name})"
def __init__(self: T,
name: str,
documentation: str,
labelnames: Iterable[str] = (),
namespace: str = '',
subsystem: str = '',
unit: str = '',
registry: Optional[CollectorRegistry] = REGISTRY,
_labelvalues: Optional[Sequence[str]] = None,
) -> None:
self._name = _build_full_name(self._type, name, namespace, subsystem, unit)
self._labelnames = _validate_labelnames(self, labelnames)
self._labelvalues = tuple(_labelvalues or ())
self._kwargs: Dict[str, Any] = {}
self._documentation = documentation
self._unit = unit
_validate_metric_name(self._name)
if self._is_parent():
# Prepare the fields needed for child metrics.
self._lock = Lock()
self._metrics: Dict[Sequence[str], T] = {}
if self._is_observable():
self._metric_init()
if not self._labelvalues:
# Register the multi-wrapper parent metric, or if a label-less metric, the whole shebang.
if registry:
registry.register(self)
def labels(self: T, *labelvalues: Any, **labelkwargs: Any) -> T:
"""Return the child for the given labelset.
All metrics can have labels, allowing grouping of related time series.
Taking a counter as an example:
from prometheus_client import Counter
c = Counter('my_requests_total', 'HTTP Failures', ['method', 'endpoint'])
c.labels('get', '/').inc()
c.labels('post', '/submit').inc()
Labels can also be provided as keyword arguments:
from prometheus_client import Counter
c = Counter('my_requests_total', 'HTTP Failures', ['method', 'endpoint'])
c.labels(method='get', endpoint='/').inc()
c.labels(method='post', endpoint='/submit').inc()
See the best practices on [naming](http://prometheus.io/docs/practices/naming/)
and [labels](http://prometheus.io/docs/practices/instrumentation/#use-labels).
"""
if not self._labelnames:
raise ValueError('No label names were set when constructing %s' % self)
if self._labelvalues:
raise ValueError('{} already has labels set ({}); can not chain calls to .labels()'.format(
self,
dict(zip(self._labelnames, self._labelvalues))
))
if labelvalues and labelkwargs:
raise ValueError("Can't pass both *args and **kwargs")
if labelkwargs:
if sorted(labelkwargs) != sorted(self._labelnames):
raise ValueError('Incorrect label names')
labelvalues = tuple(str(labelkwargs[l]) for l in self._labelnames)
else:
if len(labelvalues) != len(self._labelnames):
raise ValueError('Incorrect label count')
labelvalues = tuple(str(l) for l in labelvalues)
with self._lock:
if labelvalues not in self._metrics:
self._metrics[labelvalues] = self.__class__(
self._name,
documentation=self._documentation,
labelnames=self._labelnames,
unit=self._unit,
_labelvalues=labelvalues,
**self._kwargs
)
return self._metrics[labelvalues]
def remove(self, *labelvalues: Any) -> None:
if 'prometheus_multiproc_dir' in os.environ or 'PROMETHEUS_MULTIPROC_DIR' in os.environ:
warnings.warn(
"Removal of labels has not been implemented in multi-process mode yet.",
UserWarning)
if not self._labelnames:
raise ValueError('No label names were set when constructing %s' % self)
"""Remove the given labelset from the metric."""
if len(labelvalues) != len(self._labelnames):
raise ValueError('Incorrect label count (expected %d, got %s)' % (len(self._labelnames), labelvalues))
labelvalues = tuple(str(l) for l in labelvalues)
with self._lock:
if labelvalues in self._metrics:
del self._metrics[labelvalues]
def clear(self) -> None:
"""Remove all labelsets from the metric"""
if 'prometheus_multiproc_dir' in os.environ or 'PROMETHEUS_MULTIPROC_DIR' in os.environ:
warnings.warn(
"Clearing labels has not been implemented in multi-process mode yet",
UserWarning)
with self._lock:
self._metrics = {}
def _samples(self) -> Iterable[Sample]:
if self._is_parent():
return self._multi_samples()
else:
return self._child_samples()
def _multi_samples(self) -> Iterable[Sample]:
with self._lock:
metrics = self._metrics.copy()
for labels, metric in metrics.items():
series_labels = list(zip(self._labelnames, labels))
for suffix, sample_labels, value, timestamp, exemplar, native_histogram_value in metric._samples():
yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar, native_histogram_value)
def _child_samples(self) -> Iterable[Sample]: # pragma: no cover
raise NotImplementedError('_child_samples() must be implemented by %r' % self)
def _metric_init(self): # pragma: no cover
"""
Initialize the metric object as a child, i.e. when it has labels (if any) set.
This is factored as a separate function to allow for deferred initialization.
"""
raise NotImplementedError('_metric_init() must be implemented by %r' % self)
class Counter(MetricWrapperBase):
"""A Counter tracks counts of events or running totals.
Example use cases for Counters:
- Number of requests processed
- Number of items that were inserted into a queue
- Total amount of data that a system has processed
Counters can only go up (and be reset when the process restarts). If your use case can go down,
you should use a Gauge instead.
An example for a Counter:
from prometheus_client import Counter
c = Counter('my_failures_total', 'Description of counter')
c.inc() # Increment by 1
c.inc(1.6) # Increment by given value
There are utilities to count exceptions raised:
@c.count_exceptions()
def f():
pass
with c.count_exceptions():
pass
# Count only one type of exception
with c.count_exceptions(ValueError):
pass
You can also reset the counter to zero in case your logical "process" restarts
without restarting the actual python process.
c.reset()
"""
_type = 'counter'
def _metric_init(self) -> None:
self._value = values.ValueClass(self._type, self._name, self._name + '_total', self._labelnames,
self._labelvalues, self._documentation)
self._created = time.time()
def inc(self, amount: float = 1, exemplar: Optional[Dict[str, str]] = None) -> None:
"""Increment counter by the given amount."""
self._raise_if_not_observable()
if amount < 0:
raise ValueError('Counters can only be incremented by non-negative amounts.')
self._value.inc(amount)
if exemplar:
_validate_exemplar(exemplar)
self._value.set_exemplar(Exemplar(exemplar, amount, time.time()))
def reset(self) -> None:
"""Reset the counter to zero. Use this when a logical process restarts without restarting the actual python process."""
self._value.set(0)
self._created = time.time()
def count_exceptions(self, exception: Union[Type[BaseException], Tuple[Type[BaseException], ...]] = Exception) -> ExceptionCounter:
"""Count exceptions in a block of code or function.
Can be used as a function decorator or context manager.
Increments the counter when an exception of the given
type is raised up out of the code.
"""
self._raise_if_not_observable()
return ExceptionCounter(self, exception)
def _child_samples(self) -> Iterable[Sample]:
sample = Sample('_total', {}, self._value.get(), None, self._value.get_exemplar())
if _use_created:
return (
sample,
Sample('_created', {}, self._created, None, None)
)
return (sample,)
class Gauge(MetricWrapperBase):
"""Gauge metric, to report instantaneous values.
Examples of Gauges include:
- Inprogress requests
- Number of items in a queue
- Free memory
- Total memory
- Temperature
Gauges can go both up and down.
from prometheus_client import Gauge
g = Gauge('my_inprogress_requests', 'Description of gauge')
g.inc() # Increment by 1
g.dec(10) # Decrement by given value
g.set(4.2) # Set to a given value
There are utilities for common use cases:
g.set_to_current_time() # Set to current unixtime
# Increment when entered, decrement when exited.
@g.track_inprogress()
def f():
pass
with g.track_inprogress():
pass
A Gauge can also take its value from a callback:
d = Gauge('data_objects', 'Number of objects')
my_dict = {}
d.set_function(lambda: len(my_dict))
"""
_type = 'gauge'
_MULTIPROC_MODES = frozenset(('all', 'liveall', 'min', 'livemin', 'max', 'livemax', 'sum', 'livesum', 'mostrecent', 'livemostrecent'))
_MOST_RECENT_MODES = frozenset(('mostrecent', 'livemostrecent'))
def __init__(self,
name: str,
documentation: str,
labelnames: Iterable[str] = (),
namespace: str = '',
subsystem: str = '',
unit: str = '',
registry: Optional[CollectorRegistry] = REGISTRY,
_labelvalues: Optional[Sequence[str]] = None,
multiprocess_mode: Literal['all', 'liveall', 'min', 'livemin', 'max', 'livemax', 'sum', 'livesum', 'mostrecent', 'livemostrecent'] = 'all',
):
self._multiprocess_mode = multiprocess_mode
if multiprocess_mode not in self._MULTIPROC_MODES:
raise ValueError('Invalid multiprocess mode: ' + multiprocess_mode)
super().__init__(
name=name,
documentation=documentation,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
registry=registry,
_labelvalues=_labelvalues,
)
self._kwargs['multiprocess_mode'] = self._multiprocess_mode
self._is_most_recent = self._multiprocess_mode in self._MOST_RECENT_MODES
def _metric_init(self) -> None:
self._value = values.ValueClass(
self._type, self._name, self._name, self._labelnames, self._labelvalues,
self._documentation, multiprocess_mode=self._multiprocess_mode
)
def inc(self, amount: float = 1) -> None:
"""Increment gauge by the given amount."""
if self._is_most_recent:
raise RuntimeError("inc must not be used with the mostrecent mode")
self._raise_if_not_observable()
self._value.inc(amount)
def dec(self, amount: float = 1) -> None:
"""Decrement gauge by the given amount."""
if self._is_most_recent:
raise RuntimeError("dec must not be used with the mostrecent mode")
self._raise_if_not_observable()
self._value.inc(-amount)
def set(self, value: float) -> None:
"""Set gauge to the given value."""
self._raise_if_not_observable()
if self._is_most_recent:
self._value.set(float(value), timestamp=time.time())
else:
self._value.set(float(value))
def set_to_current_time(self) -> None:
"""Set gauge to the current unixtime."""
self.set(time.time())
def track_inprogress(self) -> InprogressTracker:
"""Track inprogress blocks of code or functions.
Can be used as a function decorator or context manager.
Increments the gauge when the code is entered,
and decrements when it is exited.
"""
self._raise_if_not_observable()
return InprogressTracker(self)
def time(self) -> Timer:
"""Time a block of code or function, and set the duration in seconds.
Can be used as a function decorator or context manager.
"""
return Timer(self, 'set')
def set_function(self, f: Callable[[], float]) -> None:
"""Call the provided function to return the Gauge value.
The function must return a float, and may be called from
multiple threads. All other methods of the Gauge become NOOPs.
"""
self._raise_if_not_observable()
def samples(_: Gauge) -> Iterable[Sample]:
return (Sample('', {}, float(f()), None, None),)
self._child_samples = types.MethodType(samples, self) # type: ignore
def _child_samples(self) -> Iterable[Sample]:
return (Sample('', {}, self._value.get(), None, None),)
class Summary(MetricWrapperBase):
"""A Summary tracks the size and number of events.
Example use cases for Summaries:
- Response latency
- Request size
Example for a Summary:
from prometheus_client import Summary
s = Summary('request_size_bytes', 'Request size (bytes)')
s.observe(512) # Observe 512 (bytes)
Example for a Summary using time:
from prometheus_client import Summary
REQUEST_TIME = Summary('response_latency_seconds', 'Response latency (seconds)')
@REQUEST_TIME.time()
def create_response(request):
'''A dummy function'''
time.sleep(1)
Example for using the same Summary object as a context manager:
with REQUEST_TIME.time():
pass # Logic to be timed
"""
_type = 'summary'
_reserved_labelnames = ['quantile']
def _metric_init(self) -> None:
self._count = values.ValueClass(self._type, self._name, self._name + '_count', self._labelnames,
self._labelvalues, self._documentation)
self._sum = values.ValueClass(self._type, self._name, self._name + '_sum', self._labelnames, self._labelvalues, self._documentation)
self._created = time.time()
def observe(self, amount: float) -> None:
"""Observe the given amount.
The amount is usually positive or zero. Negative values are
accepted but prevent current versions of Prometheus from
properly detecting counter resets in the sum of
observations. See
https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations
for details.
"""
self._raise_if_not_observable()
self._count.inc(1)
self._sum.inc(amount)
def time(self) -> Timer:
"""Time a block of code or function, and observe the duration in seconds.
Can be used as a function decorator or context manager.
"""
return Timer(self, 'observe')
def _child_samples(self) -> Iterable[Sample]:
samples = [
Sample('_count', {}, self._count.get(), None, None),
Sample('_sum', {}, self._sum.get(), None, None),
]
if _use_created:
samples.append(Sample('_created', {}, self._created, None, None))
return tuple(samples)
class Histogram(MetricWrapperBase):
"""A Histogram tracks the size and number of events in buckets.
You can use Histograms for aggregatable calculation of quantiles.
Example use cases:
- Response latency
- Request size
Example for a Histogram:
from prometheus_client import Histogram
h = Histogram('request_size_bytes', 'Request size (bytes)')
h.observe(512) # Observe 512 (bytes)
Example for a Histogram using time:
from prometheus_client import Histogram
REQUEST_TIME = Histogram('response_latency_seconds', 'Response latency (seconds)')
@REQUEST_TIME.time()
def create_response(request):
'''A dummy function'''
time.sleep(1)
Example of using the same Histogram object as a context manager:
with REQUEST_TIME.time():
pass # Logic to be timed
The default buckets are intended to cover a typical web/rpc request from milliseconds to seconds.
They can be overridden by passing `buckets` keyword argument to `Histogram`.
"""
_type = 'histogram'
_reserved_labelnames = ['le']
DEFAULT_BUCKETS = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF)
def __init__(self,
name: str,
documentation: str,
labelnames: Iterable[str] = (),
namespace: str = '',
subsystem: str = '',
unit: str = '',
registry: Optional[CollectorRegistry] = REGISTRY,
_labelvalues: Optional[Sequence[str]] = None,
buckets: Sequence[Union[float, str]] = DEFAULT_BUCKETS,
):
self._prepare_buckets(buckets)
super().__init__(
name=name,
documentation=documentation,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
registry=registry,
_labelvalues=_labelvalues,
)
self._kwargs['buckets'] = buckets
def _prepare_buckets(self, source_buckets: Sequence[Union[float, str]]) -> None:
buckets = [float(b) for b in source_buckets]
if buckets != sorted(buckets):
# This is probably an error on the part of the user,
# so raise rather than sorting for them.
raise ValueError('Buckets not in sorted order')
if buckets and buckets[-1] != INF:
buckets.append(INF)
if len(buckets) < 2:
raise ValueError('Must have at least two buckets')
self._upper_bounds = buckets
def _metric_init(self) -> None:
self._buckets: List[values.ValueClass] = []
self._created = time.time()
bucket_labelnames = self._labelnames + ('le',)
self._sum = values.ValueClass(self._type, self._name, self._name + '_sum', self._labelnames, self._labelvalues, self._documentation)
for b in self._upper_bounds:
self._buckets.append(values.ValueClass(
self._type,
self._name,
self._name + '_bucket',
bucket_labelnames,
self._labelvalues + (floatToGoString(b),),
self._documentation)
)
def observe(self, amount: float, exemplar: Optional[Dict[str, str]] = None) -> None:
"""Observe the given amount.
The amount is usually positive or zero. Negative values are
accepted but prevent current versions of Prometheus from
properly detecting counter resets in the sum of
observations. See
https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations
for details.
"""
self._raise_if_not_observable()
self._sum.inc(amount)
for i, bound in enumerate(self._upper_bounds):
if amount <= bound:
self._buckets[i].inc(1)
if exemplar:
_validate_exemplar(exemplar)
self._buckets[i].set_exemplar(Exemplar(exemplar, amount, time.time()))
break
def time(self) -> Timer:
"""Time a block of code or function, and observe the duration in seconds.
Can be used as a function decorator or context manager.
"""
return Timer(self, 'observe')
def _child_samples(self) -> Iterable[Sample]:
samples = []
acc = 0.0
for i, bound in enumerate(self._upper_bounds):
acc += self._buckets[i].get()
samples.append(Sample('_bucket', {'le': floatToGoString(bound)}, acc, None, self._buckets[i].get_exemplar()))
samples.append(Sample('_count', {}, acc, None, None))
if self._upper_bounds[0] >= 0:
samples.append(Sample('_sum', {}, self._sum.get(), None, None))
if _use_created:
samples.append(Sample('_created', {}, self._created, None, None))
return tuple(samples)
class Info(MetricWrapperBase):
"""Info metric, key-value pairs.
Examples of Info include:
- Build information
- Version information
- Potential target metadata
Example usage:
from prometheus_client import Info
i = Info('my_build', 'Description of info')
i.info({'version': '1.2.3', 'buildhost': 'foo@bar'})
Info metrics do not work in multiprocess mode.
"""
_type = 'info'
def _metric_init(self):
self._labelname_set = set(self._labelnames)
self._lock = Lock()
self._value = {}
def info(self, val: Dict[str, str]) -> None:
"""Set info metric."""
if self._labelname_set.intersection(val.keys()):
raise ValueError('Overlapping labels for Info metric, metric: {} child: {}'.format(
self._labelnames, val))
if any(i is None for i in val.values()):
raise ValueError('Label value cannot be None')
with self._lock:
self._value = dict(val)
def _child_samples(self) -> Iterable[Sample]:
with self._lock:
return (Sample('_info', self._value, 1.0, None, None),)
class Enum(MetricWrapperBase):
"""Enum metric, which of a set of states is true.
Example usage:
from prometheus_client import Enum
e = Enum('task_state', 'Description of enum',
states=['starting', 'running', 'stopped'])
e.state('running')
The first listed state will be the default.
Enum metrics do not work in multiprocess mode.
"""
_type = 'stateset'
def __init__(self,
name: str,
documentation: str,
labelnames: Sequence[str] = (),
namespace: str = '',
subsystem: str = '',
unit: str = '',
registry: Optional[CollectorRegistry] = REGISTRY,
_labelvalues: Optional[Sequence[str]] = None,
states: Optional[Sequence[str]] = None,
):
super().__init__(
name=name,
documentation=documentation,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
registry=registry,
_labelvalues=_labelvalues,
)
if name in labelnames:
raise ValueError(f'Overlapping labels for Enum metric: {name}')
if not states:
raise ValueError(f'No states provided for Enum metric: {name}')
self._kwargs['states'] = self._states = states
def _metric_init(self) -> None:
self._value = 0
self._lock = Lock()
def state(self, state: str) -> None:
"""Set enum metric state."""
self._raise_if_not_observable()
with self._lock:
self._value = self._states.index(state)
def _child_samples(self) -> Iterable[Sample]:
with self._lock:
return [
Sample('', {self._name: s}, 1 if i == self._value else 0, None, None)
for i, s
in enumerate(self._states)
]

View File

@@ -0,0 +1,415 @@
from typing import Dict, List, Optional, Sequence, Tuple, Union
from .samples import Exemplar, NativeHistogram, Sample, Timestamp
from .validation import _validate_metric_name
METRIC_TYPES = (
'counter', 'gauge', 'summary', 'histogram',
'gaugehistogram', 'unknown', 'info', 'stateset',
)
class Metric:
"""A single metric family and its samples.
This is intended only for internal use by the instrumentation client.
Custom collectors should use GaugeMetricFamily, CounterMetricFamily
and SummaryMetricFamily instead.
"""
def __init__(self, name: str, documentation: str, typ: str, unit: str = ''):
if unit and not name.endswith("_" + unit):
name += "_" + unit
_validate_metric_name(name)
self.name: str = name
self.documentation: str = documentation
self.unit: str = unit
if typ == 'untyped':
typ = 'unknown'
if typ not in METRIC_TYPES:
raise ValueError('Invalid metric type: ' + typ)
self.type: str = typ
self.samples: List[Sample] = []
def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None, native_histogram: Optional[NativeHistogram] = None) -> None:
"""Add a sample to the metric.
Internal-only, do not use."""
self.samples.append(Sample(name, labels, value, timestamp, exemplar, native_histogram))
def __eq__(self, other: object) -> bool:
return (isinstance(other, Metric)
and self.name == other.name
and self.documentation == other.documentation
and self.type == other.type
and self.unit == other.unit
and self.samples == other.samples)
def __repr__(self) -> str:
return "Metric({}, {}, {}, {}, {})".format(
self.name,
self.documentation,
self.type,
self.unit,
self.samples,
)
def _restricted_metric(self, names):
"""Build a snapshot of a metric with samples restricted to a given set of names."""
samples = [s for s in self.samples if s[0] in names]
if samples:
m = Metric(self.name, self.documentation, self.type)
m.samples = samples
return m
return None
class UnknownMetricFamily(Metric):
"""A single unknown metric and its samples.
For use by custom collectors.
"""
def __init__(self,
name: str,
documentation: str,
value: Optional[float] = None,
labels: Optional[Sequence[str]] = None,
unit: str = '',
):
Metric.__init__(self, name, documentation, 'unknown', unit)
if labels is not None and value is not None:
raise ValueError('Can only specify at most one of value and labels.')
if labels is None:
labels = []
self._labelnames = tuple(labels)
if value is not None:
self.add_metric([], value)
def add_metric(self, labels: Sequence[str], value: float, timestamp: Optional[Union[Timestamp, float]] = None) -> None:
"""Add a metric to the metric family.
Args:
labels: A list of label values
value: The value of the metric.
"""
self.samples.append(Sample(self.name, dict(zip(self._labelnames, labels)), value, timestamp))
# For backward compatibility.
UntypedMetricFamily = UnknownMetricFamily
class CounterMetricFamily(Metric):
"""A single counter and its samples.
For use by custom collectors.
"""
def __init__(self,
name: str,
documentation: str,
value: Optional[float] = None,
labels: Optional[Sequence[str]] = None,
created: Optional[float] = None,
unit: str = '',
exemplar: Optional[Exemplar] = None,
):
# Glue code for pre-OpenMetrics metrics.
if name.endswith('_total'):
name = name[:-6]
Metric.__init__(self, name, documentation, 'counter', unit)
if labels is not None and value is not None:
raise ValueError('Can only specify at most one of value and labels.')
if labels is None:
labels = []
self._labelnames = tuple(labels)
if value is not None:
self.add_metric([], value, created, exemplar=exemplar)
def add_metric(self,
labels: Sequence[str],
value: float,
created: Optional[float] = None,
timestamp: Optional[Union[Timestamp, float]] = None,
exemplar: Optional[Exemplar] = None,
) -> None:
"""Add a metric to the metric family.
Args:
labels: A list of label values
value: The value of the metric
created: Optional unix timestamp the child was created at.
"""
self.samples.append(Sample(self.name + '_total', dict(zip(self._labelnames, labels)), value, timestamp, exemplar))
if created is not None:
self.samples.append(Sample(self.name + '_created', dict(zip(self._labelnames, labels)), created, timestamp))
class GaugeMetricFamily(Metric):
"""A single gauge and its samples.
For use by custom collectors.
"""
def __init__(self,
name: str,
documentation: str,
value: Optional[float] = None,
labels: Optional[Sequence[str]] = None,
unit: str = '',
):
Metric.__init__(self, name, documentation, 'gauge', unit)
if labels is not None and value is not None:
raise ValueError('Can only specify at most one of value and labels.')
if labels is None:
labels = []
self._labelnames = tuple(labels)
if value is not None:
self.add_metric([], value)
def add_metric(self, labels: Sequence[str], value: float, timestamp: Optional[Union[Timestamp, float]] = None) -> None:
"""Add a metric to the metric family.
Args:
labels: A list of label values
value: A float
"""
self.samples.append(Sample(self.name, dict(zip(self._labelnames, labels)), value, timestamp))
class SummaryMetricFamily(Metric):
"""A single summary and its samples.
For use by custom collectors.
"""
def __init__(self,
name: str,
documentation: str,
count_value: Optional[int] = None,
sum_value: Optional[float] = None,
labels: Optional[Sequence[str]] = None,
unit: str = '',
):
Metric.__init__(self, name, documentation, 'summary', unit)
if (sum_value is None) != (count_value is None):
raise ValueError('count_value and sum_value must be provided together.')
if labels is not None and count_value is not None:
raise ValueError('Can only specify at most one of value and labels.')
if labels is None:
labels = []
self._labelnames = tuple(labels)
# The and clause is necessary only for typing, the above ValueError will raise if only one is set.
if count_value is not None and sum_value is not None:
self.add_metric([], count_value, sum_value)
def add_metric(self,
labels: Sequence[str],
count_value: int,
sum_value: float,
timestamp:
Optional[Union[float, Timestamp]] = None
) -> None:
"""Add a metric to the metric family.
Args:
labels: A list of label values
count_value: The count value of the metric.
sum_value: The sum value of the metric.
"""
self.samples.append(Sample(self.name + '_count', dict(zip(self._labelnames, labels)), count_value, timestamp))
self.samples.append(Sample(self.name + '_sum', dict(zip(self._labelnames, labels)), sum_value, timestamp))
class HistogramMetricFamily(Metric):
"""A single histogram and its samples.
For use by custom collectors.
"""
def __init__(self,
name: str,
documentation: str,
buckets: Optional[Sequence[Union[Tuple[str, float], Tuple[str, float, Exemplar]]]] = None,
sum_value: Optional[float] = None,
labels: Optional[Sequence[str]] = None,
unit: str = '',
):
Metric.__init__(self, name, documentation, 'histogram', unit)
if sum_value is not None and buckets is None:
raise ValueError('sum value cannot be provided without buckets.')
if labels is not None and buckets is not None:
raise ValueError('Can only specify at most one of buckets and labels.')
if labels is None:
labels = []
self._labelnames = tuple(labels)
if buckets is not None:
self.add_metric([], buckets, sum_value)
def add_metric(self,
labels: Sequence[str],
buckets: Sequence[Union[Tuple[str, float], Tuple[str, float, Exemplar]]],
sum_value: Optional[float],
timestamp: Optional[Union[Timestamp, float]] = None) -> None:
"""Add a metric to the metric family.
Args:
labels: A list of label values
buckets: A list of lists.
Each inner list can be a pair of bucket name and value,
or a triple of bucket name, value, and exemplar.
The buckets must be sorted, and +Inf present.
sum_value: The sum value of the metric.
"""
for b in buckets:
bucket, value = b[:2]
exemplar = None
if len(b) == 3:
exemplar = b[2] # type: ignore
self.samples.append(Sample(
self.name + '_bucket',
dict(list(zip(self._labelnames, labels)) + [('le', bucket)]),
value,
timestamp,
exemplar,
))
# Don't include sum and thus count if there's negative buckets.
if float(buckets[0][0]) >= 0 and sum_value is not None:
# +Inf is last and provides the count value.
self.samples.append(
Sample(self.name + '_count', dict(zip(self._labelnames, labels)), buckets[-1][1], timestamp))
self.samples.append(
Sample(self.name + '_sum', dict(zip(self._labelnames, labels)), sum_value, timestamp))
class GaugeHistogramMetricFamily(Metric):
"""A single gauge histogram and its samples.
For use by custom collectors.
"""
def __init__(self,
name: str,
documentation: str,
buckets: Optional[Sequence[Tuple[str, float]]] = None,
gsum_value: Optional[float] = None,
labels: Optional[Sequence[str]] = None,
unit: str = '',
):
Metric.__init__(self, name, documentation, 'gaugehistogram', unit)
if labels is not None and buckets is not None:
raise ValueError('Can only specify at most one of buckets and labels.')
if labels is None:
labels = []
self._labelnames = tuple(labels)
if buckets is not None:
self.add_metric([], buckets, gsum_value)
def add_metric(self,
labels: Sequence[str],
buckets: Sequence[Tuple[str, float]],
gsum_value: Optional[float],
timestamp: Optional[Union[float, Timestamp]] = None,
) -> None:
"""Add a metric to the metric family.
Args:
labels: A list of label values
buckets: A list of pairs of bucket names and values.
The buckets must be sorted, and +Inf present.
gsum_value: The sum value of the metric.
"""
for bucket, value in buckets:
self.samples.append(Sample(
self.name + '_bucket',
dict(list(zip(self._labelnames, labels)) + [('le', bucket)]),
value, timestamp))
# +Inf is last and provides the count value.
self.samples.extend([
Sample(self.name + '_gcount', dict(zip(self._labelnames, labels)), buckets[-1][1], timestamp),
# TODO: Handle None gsum_value correctly. Currently a None will fail exposition but is allowed here.
Sample(self.name + '_gsum', dict(zip(self._labelnames, labels)), gsum_value, timestamp), # type: ignore
])
class InfoMetricFamily(Metric):
"""A single info and its samples.
For use by custom collectors.
"""
def __init__(self,
name: str,
documentation: str,
value: Optional[Dict[str, str]] = None,
labels: Optional[Sequence[str]] = None,
):
Metric.__init__(self, name, documentation, 'info')
if labels is not None and value is not None:
raise ValueError('Can only specify at most one of value and labels.')
if labels is None:
labels = []
self._labelnames = tuple(labels)
if value is not None:
self.add_metric([], value)
def add_metric(self,
labels: Sequence[str],
value: Dict[str, str],
timestamp: Optional[Union[Timestamp, float]] = None,
) -> None:
"""Add a metric to the metric family.
Args:
labels: A list of label values
value: A dict of labels
"""
self.samples.append(Sample(
self.name + '_info',
dict(dict(zip(self._labelnames, labels)), **value),
1,
timestamp,
))
class StateSetMetricFamily(Metric):
"""A single stateset and its samples.
For use by custom collectors.
"""
def __init__(self,
name: str,
documentation: str,
value: Optional[Dict[str, bool]] = None,
labels: Optional[Sequence[str]] = None,
):
Metric.__init__(self, name, documentation, 'stateset')
if labels is not None and value is not None:
raise ValueError('Can only specify at most one of value and labels.')
if labels is None:
labels = []
self._labelnames = tuple(labels)
if value is not None:
self.add_metric([], value)
def add_metric(self,
labels: Sequence[str],
value: Dict[str, bool],
timestamp: Optional[Union[Timestamp, float]] = None,
) -> None:
"""Add a metric to the metric family.
Args:
labels: A list of label values
value: A dict of string state names to booleans
"""
labels = tuple(labels)
for state, enabled in sorted(value.items()):
v = (1 if enabled else 0)
self.samples.append(Sample(
self.name,
dict(zip(self._labelnames + (self.name,), labels + (state,))),
v,
timestamp,
))

View File

@@ -0,0 +1,145 @@
import json
import mmap
import os
import struct
from typing import List
_INITIAL_MMAP_SIZE = 1 << 16
_pack_integer_func = struct.Struct(b'i').pack
_pack_two_doubles_func = struct.Struct(b'dd').pack
_unpack_integer = struct.Struct(b'i').unpack_from
_unpack_two_doubles = struct.Struct(b'dd').unpack_from
# struct.pack_into has atomicity issues because it will temporarily write 0 into
# the mmap, resulting in false reads to 0 when experiencing a lot of writes.
# Using direct assignment solves this issue.
def _pack_two_doubles(data, pos, value, timestamp):
data[pos:pos + 16] = _pack_two_doubles_func(value, timestamp)
def _pack_integer(data, pos, value):
data[pos:pos + 4] = _pack_integer_func(value)
def _read_all_values(data, used=0):
"""Yield (key, value, timestamp, pos). No locking is performed."""
if used <= 0:
# If not valid `used` value is passed in, read it from the file.
used = _unpack_integer(data, 0)[0]
pos = 8
while pos < used:
encoded_len = _unpack_integer(data, pos)[0]
# check we are not reading beyond bounds
if encoded_len + pos > used:
raise RuntimeError('Read beyond file size detected, file is corrupted.')
pos += 4
encoded_key = data[pos:pos + encoded_len]
padded_len = encoded_len + (8 - (encoded_len + 4) % 8)
pos += padded_len
value, timestamp = _unpack_two_doubles(data, pos)
yield encoded_key.decode('utf-8'), value, timestamp, pos
pos += 16
class MmapedDict:
"""A dict of doubles, backed by an mmapped file.
The file starts with a 4 byte int, indicating how much of it is used.
Then 4 bytes of padding.
There's then a number of entries, consisting of a 4 byte int which is the
size of the next field, a utf-8 encoded string key, padding to a 8 byte
alignment, and then a 8 byte float which is the value and a 8 byte float
which is a UNIX timestamp in seconds.
Not thread safe.
"""
def __init__(self, filename, read_mode=False):
self._f = open(filename, 'rb' if read_mode else 'a+b')
self._fname = filename
capacity = os.fstat(self._f.fileno()).st_size
if capacity == 0:
self._f.truncate(_INITIAL_MMAP_SIZE)
capacity = _INITIAL_MMAP_SIZE
self._capacity = capacity
self._m = mmap.mmap(self._f.fileno(), self._capacity,
access=mmap.ACCESS_READ if read_mode else mmap.ACCESS_WRITE)
self._positions = {}
self._used = _unpack_integer(self._m, 0)[0]
if self._used == 0:
self._used = 8
_pack_integer(self._m, 0, self._used)
else:
if not read_mode:
for key, _, _, pos in self._read_all_values():
self._positions[key] = pos
@staticmethod
def read_all_values_from_file(filename):
with open(filename, 'rb') as infp:
# Read the first block of data, including the first 4 bytes which tell us
# how much of the file (which is preallocated to _INITIAL_MMAP_SIZE bytes) is occupied.
data = infp.read(mmap.PAGESIZE)
used = _unpack_integer(data, 0)[0]
if used > len(data): # Then read in the rest, if needed.
data += infp.read(used - len(data))
return _read_all_values(data, used)
def _init_value(self, key):
"""Initialize a value. Lock must be held by caller."""
encoded = key.encode('utf-8')
# Pad to be 8-byte aligned.
padded = encoded + (b' ' * (8 - (len(encoded) + 4) % 8))
value = struct.pack(f'i{len(padded)}sdd'.encode(), len(encoded), padded, 0.0, 0.0)
while self._used + len(value) > self._capacity:
self._capacity *= 2
self._f.truncate(self._capacity)
self._m = mmap.mmap(self._f.fileno(), self._capacity)
self._m[self._used:self._used + len(value)] = value
# Update how much space we've used.
self._used += len(value)
_pack_integer(self._m, 0, self._used)
self._positions[key] = self._used - 16
def _read_all_values(self):
"""Yield (key, value, pos). No locking is performed."""
return _read_all_values(data=self._m, used=self._used)
def read_all_values(self):
"""Yield (key, value, timestamp). No locking is performed."""
for k, v, ts, _ in self._read_all_values():
yield k, v, ts
def read_value(self, key):
if key not in self._positions:
self._init_value(key)
pos = self._positions[key]
return _unpack_two_doubles(self._m, pos)
def write_value(self, key, value, timestamp):
if key not in self._positions:
self._init_value(key)
pos = self._positions[key]
_pack_two_doubles(self._m, pos, value, timestamp)
def close(self):
if self._f:
self._m.close()
self._m = None
self._f.close()
self._f = None
def mmap_key(metric_name: str, name: str, labelnames: List[str], labelvalues: List[str], help_text: str) -> str:
"""Format a key for use in the mmap file."""
# ensure labels are in consistent order for identity
labels = dict(zip(labelnames, labelvalues))
return json.dumps([metric_name, name, labels, help_text], sort_keys=True)

View File

@@ -0,0 +1,170 @@
from collections import defaultdict
import glob
import json
import os
import warnings
from .metrics import Gauge
from .metrics_core import Metric
from .mmap_dict import MmapedDict
from .samples import Sample
from .utils import floatToGoString
try: # Python3
FileNotFoundError
except NameError: # Python >= 2.5
FileNotFoundError = IOError
class MultiProcessCollector:
"""Collector for files for multi-process mode."""
def __init__(self, registry, path=None):
if path is None:
# This deprecation warning can go away in a few releases when removing the compatibility
if 'prometheus_multiproc_dir' in os.environ and 'PROMETHEUS_MULTIPROC_DIR' not in os.environ:
os.environ['PROMETHEUS_MULTIPROC_DIR'] = os.environ['prometheus_multiproc_dir']
warnings.warn("prometheus_multiproc_dir variable has been deprecated in favor of the upper case naming PROMETHEUS_MULTIPROC_DIR", DeprecationWarning)
path = os.environ.get('PROMETHEUS_MULTIPROC_DIR')
if not path or not os.path.isdir(path):
raise ValueError('env PROMETHEUS_MULTIPROC_DIR is not set or not a directory')
self._path = path
if registry:
registry.register(self)
@staticmethod
def merge(files, accumulate=True):
"""Merge metrics from given mmap files.
By default, histograms are accumulated, as per prometheus wire format.
But if writing the merged data back to mmap files, use
accumulate=False to avoid compound accumulation.
"""
metrics = MultiProcessCollector._read_metrics(files)
return MultiProcessCollector._accumulate_metrics(metrics, accumulate)
@staticmethod
def _read_metrics(files):
metrics = {}
key_cache = {}
def _parse_key(key):
val = key_cache.get(key)
if not val:
metric_name, name, labels, help_text = json.loads(key)
labels_key = tuple(sorted(labels.items()))
val = key_cache[key] = (metric_name, name, labels, labels_key, help_text)
return val
for f in files:
parts = os.path.basename(f).split('_')
typ = parts[0]
try:
file_values = MmapedDict.read_all_values_from_file(f)
except FileNotFoundError:
if typ == 'gauge' and parts[1].startswith('live'):
# Files for 'live*' gauges can be deleted between the glob of collect
# and now (via a mark_process_dead call) so don't fail if
# the file is missing
continue
raise
for key, value, timestamp, _ in file_values:
metric_name, name, labels, labels_key, help_text = _parse_key(key)
metric = metrics.get(metric_name)
if metric is None:
metric = Metric(metric_name, help_text, typ)
metrics[metric_name] = metric
if typ == 'gauge':
pid = parts[2][:-3]
metric._multiprocess_mode = parts[1]
metric.add_sample(name, labels_key + (('pid', pid),), value, timestamp)
else:
# The duplicates and labels are fixed in the next for.
metric.add_sample(name, labels_key, value)
return metrics
@staticmethod
def _accumulate_metrics(metrics, accumulate):
for metric in metrics.values():
samples = defaultdict(float)
sample_timestamps = defaultdict(float)
buckets = defaultdict(lambda: defaultdict(float))
samples_setdefault = samples.setdefault
for s in metric.samples:
name, labels, value, timestamp, exemplar, native_histogram_value = s
if metric.type == 'gauge':
without_pid_key = (name, tuple(l for l in labels if l[0] != 'pid'))
if metric._multiprocess_mode in ('min', 'livemin'):
current = samples_setdefault(without_pid_key, value)
if value < current:
samples[without_pid_key] = value
elif metric._multiprocess_mode in ('max', 'livemax'):
current = samples_setdefault(without_pid_key, value)
if value > current:
samples[without_pid_key] = value
elif metric._multiprocess_mode in ('sum', 'livesum'):
samples[without_pid_key] += value
elif metric._multiprocess_mode in ('mostrecent', 'livemostrecent'):
current_timestamp = sample_timestamps[without_pid_key]
timestamp = float(timestamp or 0)
if current_timestamp < timestamp:
samples[without_pid_key] = value
sample_timestamps[without_pid_key] = timestamp
else: # all/liveall
samples[(name, labels)] = value
elif metric.type == 'histogram':
# A for loop with early exit is faster than a genexpr
# or a listcomp that ends up building unnecessary things
for l in labels:
if l[0] == 'le':
bucket_value = float(l[1])
# _bucket
without_le = tuple(l for l in labels if l[0] != 'le')
buckets[without_le][bucket_value] += value
break
else: # did not find the `le` key
# _sum/_count
samples[(name, labels)] += value
else:
# Counter and Summary.
samples[(name, labels)] += value
# Accumulate bucket values.
if metric.type == 'histogram':
for labels, values in buckets.items():
acc = 0.0
for bucket, value in sorted(values.items()):
sample_key = (
metric.name + '_bucket',
labels + (('le', floatToGoString(bucket)),),
)
if accumulate:
acc += value
samples[sample_key] = acc
else:
samples[sample_key] = value
if accumulate:
samples[(metric.name + '_count', labels)] = acc
# Convert to correct sample format.
metric.samples = [Sample(name_, dict(labels), value) for (name_, labels), value in samples.items()]
return metrics.values()
def collect(self):
files = glob.glob(os.path.join(self._path, '*.db'))
return self.merge(files, accumulate=True)
_LIVE_GAUGE_MULTIPROCESS_MODES = {m for m in Gauge._MULTIPROC_MODES if m.startswith('live')}
def mark_process_dead(pid, path=None):
"""Do bookkeeping for when one process dies in a multi-process setup."""
if path is None:
path = os.environ.get('PROMETHEUS_MULTIPROC_DIR', os.environ.get('prometheus_multiproc_dir'))
for mode in _LIVE_GAUGE_MULTIPROCESS_MODES:
for f in glob.glob(os.path.join(path, f'gauge_{mode}_{pid}.db')):
os.remove(f)

View File

@@ -0,0 +1,117 @@
#!/usr/bin/env python
from ..utils import floatToGoString
from ..validation import (
_is_valid_legacy_labelname, _is_valid_legacy_metric_name,
)
CONTENT_TYPE_LATEST = 'application/openmetrics-text; version=1.0.0; charset=utf-8'
"""Content type of the latest OpenMetrics text format"""
def _is_valid_exemplar_metric(metric, sample):
if metric.type == 'counter' and sample.name.endswith('_total'):
return True
if metric.type in ('gaugehistogram') and sample.name.endswith('_bucket'):
return True
if metric.type in ('histogram') and sample.name.endswith('_bucket') or sample.name == metric.name:
return True
return False
def generate_latest(registry):
'''Returns the metrics from the registry in latest text format as a string.'''
output = []
for metric in registry.collect():
try:
mname = metric.name
output.append('# HELP {} {}\n'.format(
escape_metric_name(mname), _escape(metric.documentation)))
output.append(f'# TYPE {escape_metric_name(mname)} {metric.type}\n')
if metric.unit:
output.append(f'# UNIT {escape_metric_name(mname)} {metric.unit}\n')
for s in metric.samples:
if not _is_valid_legacy_metric_name(s.name):
labelstr = escape_metric_name(s.name)
if s.labels:
labelstr += ', '
else:
labelstr = ''
if s.labels:
items = sorted(s.labels.items())
labelstr += ','.join(
['{}="{}"'.format(
escape_label_name(k), _escape(v))
for k, v in items])
if labelstr:
labelstr = "{" + labelstr + "}"
if s.exemplar:
if not _is_valid_exemplar_metric(metric, s):
raise ValueError(f"Metric {metric.name} has exemplars, but is not a histogram bucket or counter")
labels = '{{{0}}}'.format(','.join(
['{}="{}"'.format(
k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))
for k, v in sorted(s.exemplar.labels.items())]))
if s.exemplar.timestamp is not None:
exemplarstr = ' # {} {} {}'.format(
labels,
floatToGoString(s.exemplar.value),
s.exemplar.timestamp,
)
else:
exemplarstr = ' # {} {}'.format(
labels,
floatToGoString(s.exemplar.value),
)
else:
exemplarstr = ''
timestamp = ''
if s.timestamp is not None:
timestamp = f' {s.timestamp}'
if _is_valid_legacy_metric_name(s.name):
output.append('{}{} {}{}{}\n'.format(
s.name,
labelstr,
floatToGoString(s.value),
timestamp,
exemplarstr,
))
else:
output.append('{} {}{}{}\n'.format(
labelstr,
floatToGoString(s.value),
timestamp,
exemplarstr,
))
except Exception as exception:
exception.args = (exception.args or ('',)) + (metric,)
raise
output.append('# EOF\n')
return ''.join(output).encode('utf-8')
def escape_metric_name(s: str) -> str:
"""Escapes the metric name and puts it in quotes iff the name does not
conform to the legacy Prometheus character set.
"""
if _is_valid_legacy_metric_name(s):
return s
return '"{}"'.format(_escape(s))
def escape_label_name(s: str) -> str:
"""Escapes the label name and puts it in quotes iff the name does not
conform to the legacy Prometheus character set.
"""
if _is_valid_legacy_labelname(s):
return s
return '"{}"'.format(_escape(s))
def _escape(s: str) -> str:
"""Performs backslash escaping on backslash, newline, and double-quote characters."""
return s.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')

View File

@@ -0,0 +1,653 @@
#!/usr/bin/env python
import io as StringIO
import math
import re
from ..metrics_core import Metric
from ..parser import (
_last_unquoted_char, _next_unquoted_char, _parse_value, _split_quoted,
_unquote_unescape, parse_labels,
)
from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
from ..utils import floatToGoString
from ..validation import _is_valid_legacy_metric_name, _validate_metric_name
def text_string_to_metric_families(text):
"""Parse Openmetrics text format from a unicode string.
See text_fd_to_metric_families.
"""
yield from text_fd_to_metric_families(StringIO.StringIO(text))
_CANONICAL_NUMBERS = {float("inf")}
def _isUncanonicalNumber(s):
f = float(s)
if f not in _CANONICAL_NUMBERS:
return False # Only the canonical numbers are required to be canonical.
return s != floatToGoString(f)
ESCAPE_SEQUENCES = {
'\\\\': '\\',
'\\n': '\n',
'\\"': '"',
}
def _replace_escape_sequence(match):
return ESCAPE_SEQUENCES[match.group(0)]
ESCAPING_RE = re.compile(r'\\[\\n"]')
def _replace_escaping(s):
return ESCAPING_RE.sub(_replace_escape_sequence, s)
def _unescape_help(text):
result = []
slash = False
for char in text:
if slash:
if char == '\\':
result.append('\\')
elif char == '"':
result.append('"')
elif char == 'n':
result.append('\n')
else:
result.append('\\' + char)
slash = False
else:
if char == '\\':
slash = True
else:
result.append(char)
if slash:
result.append('\\')
return ''.join(result)
def _parse_timestamp(timestamp):
timestamp = ''.join(timestamp)
if not timestamp:
return None
if timestamp != timestamp.strip() or '_' in timestamp:
raise ValueError(f"Invalid timestamp: {timestamp!r}")
try:
# Simple int.
return Timestamp(int(timestamp), 0)
except ValueError:
try:
# aaaa.bbbb. Nanosecond resolution supported.
parts = timestamp.split('.', 1)
return Timestamp(int(parts[0]), int(parts[1][:9].ljust(9, "0")))
except ValueError:
# Float.
ts = float(timestamp)
if math.isnan(ts) or math.isinf(ts):
raise ValueError(f"Invalid timestamp: {timestamp!r}")
return ts
def _is_character_escaped(s, charpos):
num_bslashes = 0
while (charpos > num_bslashes
and s[charpos - 1 - num_bslashes] == '\\'):
num_bslashes += 1
return num_bslashes % 2 == 1
def _parse_sample(text):
separator = " # "
# Detect the labels in the text
label_start = _next_unquoted_char(text, '{')
if label_start == -1 or separator in text[:label_start]:
# We don't have labels, but there could be an exemplar.
name_end = _next_unquoted_char(text, ' ')
name = text[:name_end]
if not _is_valid_legacy_metric_name(name):
raise ValueError("invalid metric name:" + text)
# Parse the remaining text after the name
remaining_text = text[name_end + 1:]
value, timestamp, exemplar = _parse_remaining_text(remaining_text)
return Sample(name, {}, value, timestamp, exemplar)
name = text[:label_start]
label_end = _next_unquoted_char(text, '}')
labels = parse_labels(text[label_start + 1:label_end], True)
if not name:
# Name might be in the labels
if '__name__' not in labels:
raise ValueError
name = labels['__name__']
del labels['__name__']
elif '__name__' in labels:
raise ValueError("metric name specified more than once")
# Parsing labels succeeded, continue parsing the remaining text
remaining_text = text[label_end + 2:]
value, timestamp, exemplar = _parse_remaining_text(remaining_text)
return Sample(name, labels, value, timestamp, exemplar)
def _parse_remaining_text(text):
split_text = text.split(" ", 1)
val = _parse_value(split_text[0])
if len(split_text) == 1:
# We don't have timestamp or exemplar
return val, None, None
timestamp = []
exemplar_value = []
exemplar_timestamp = []
exemplar_labels = None
state = 'timestamp'
text = split_text[1]
it = iter(text)
in_quotes = False
for char in it:
if char == '"':
in_quotes = not in_quotes
if in_quotes:
continue
if state == 'timestamp':
if char == '#' and not timestamp:
state = 'exemplarspace'
elif char == ' ':
state = 'exemplarhash'
else:
timestamp.append(char)
elif state == 'exemplarhash':
if char == '#':
state = 'exemplarspace'
else:
raise ValueError("Invalid line: " + text)
elif state == 'exemplarspace':
if char == ' ':
state = 'exemplarstartoflabels'
else:
raise ValueError("Invalid line: " + text)
elif state == 'exemplarstartoflabels':
if char == '{':
label_start = _next_unquoted_char(text, '{')
label_end = _last_unquoted_char(text, '}')
exemplar_labels = parse_labels(text[label_start + 1:label_end], True)
state = 'exemplarparsedlabels'
else:
raise ValueError("Invalid line: " + text)
elif state == 'exemplarparsedlabels':
if char == '}':
state = 'exemplarvaluespace'
elif state == 'exemplarvaluespace':
if char == ' ':
state = 'exemplarvalue'
else:
raise ValueError("Invalid line: " + text)
elif state == 'exemplarvalue':
if char == ' ' and not exemplar_value:
raise ValueError("Invalid line: " + text)
elif char == ' ':
state = 'exemplartimestamp'
else:
exemplar_value.append(char)
elif state == 'exemplartimestamp':
exemplar_timestamp.append(char)
# Trailing space after value.
if state == 'timestamp' and not timestamp:
raise ValueError("Invalid line: " + text)
# Trailing space after value.
if state == 'exemplartimestamp' and not exemplar_timestamp:
raise ValueError("Invalid line: " + text)
# Incomplete exemplar.
if state in ['exemplarhash', 'exemplarspace', 'exemplarstartoflabels', 'exemplarparsedlabels']:
raise ValueError("Invalid line: " + text)
ts = _parse_timestamp(timestamp)
exemplar = None
if exemplar_labels is not None:
exemplar_length = sum(len(k) + len(v) for k, v in exemplar_labels.items())
if exemplar_length > 128:
raise ValueError("Exemplar labels are too long: " + text)
exemplar = Exemplar(
exemplar_labels,
_parse_value(exemplar_value),
_parse_timestamp(exemplar_timestamp),
)
return val, ts, exemplar
def _parse_nh_sample(text, suffixes):
"""Determines if the line has a native histogram sample, and parses it if so."""
labels_start = _next_unquoted_char(text, '{')
labels_end = -1
# Finding a native histogram sample requires careful parsing of
# possibly-quoted text, which can appear in metric names, label names, and
# values.
#
# First, we need to determine if there are metric labels. Find the space
# between the metric definition and the rest of the line. Look for unquoted
# space or {.
i = 0
has_metric_labels = False
i = _next_unquoted_char(text, ' {')
if i == -1:
return
# If the first unquoted char was a {, then that is the metric labels (which
# could contain a UTF-8 metric name).
if text[i] == '{':
has_metric_labels = True
# Consume the labels -- jump ahead to the close bracket.
labels_end = i = _next_unquoted_char(text, '}', i)
if labels_end == -1:
raise ValueError
# If there is no subsequent unquoted {, then it's definitely not a nh.
nh_value_start = _next_unquoted_char(text, '{', i + 1)
if nh_value_start == -1:
return
# Edge case: if there is an unquoted # between the metric definition and the {,
# then this is actually an exemplar
exemplar = _next_unquoted_char(text, '#', i + 1)
if exemplar != -1 and exemplar < nh_value_start:
return
nh_value_end = _next_unquoted_char(text, '}', nh_value_start)
if nh_value_end == -1:
raise ValueError
if has_metric_labels:
labelstext = text[labels_start + 1:labels_end]
labels = parse_labels(labelstext, True)
name_end = labels_start
name = text[:name_end]
if name.endswith(suffixes):
raise ValueError("the sample name of a native histogram with labels should have no suffixes", name)
if not name:
# Name might be in the labels
if '__name__' not in labels:
raise ValueError
name = labels['__name__']
del labels['__name__']
# Edge case: the only "label" is the name definition.
if not labels:
labels = None
nh_value = text[nh_value_start:]
nat_hist_value = _parse_nh_struct(nh_value)
return Sample(name, labels, None, None, None, nat_hist_value)
# check if it's a native histogram
else:
nh_value = text[nh_value_start:]
name_end = nh_value_start - 1
name = text[:name_end]
if name.endswith(suffixes):
raise ValueError("the sample name of a native histogram should have no suffixes", name)
# Not possible for UTF-8 name here, that would have been caught as having a labelset.
nat_hist_value = _parse_nh_struct(nh_value)
return Sample(name, None, None, None, None, nat_hist_value)
def _parse_nh_struct(text):
pattern = r'(\w+):\s*([^,}]+)'
re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+(,\d+:\d+)*)\]')
re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]')
items = dict(re.findall(pattern, text))
span_matches = re_spans.findall(text)
deltas = dict(re_deltas.findall(text))
count_value = int(items['count'])
sum_value = int(items['sum'])
schema = int(items['schema'])
zero_threshold = float(items['zero_threshold'])
zero_count = int(items['zero_count'])
pos_spans = _compose_spans(span_matches, 'positive_spans')
neg_spans = _compose_spans(span_matches, 'negative_spans')
pos_deltas = _compose_deltas(deltas, 'positive_deltas')
neg_deltas = _compose_deltas(deltas, 'negative_deltas')
return NativeHistogram(
count_value=count_value,
sum_value=sum_value,
schema=schema,
zero_threshold=zero_threshold,
zero_count=zero_count,
pos_spans=pos_spans,
neg_spans=neg_spans,
pos_deltas=pos_deltas,
neg_deltas=neg_deltas
)
def _compose_spans(span_matches, spans_name):
"""Takes a list of span matches (expected to be a list of tuples) and a string
(the expected span list name) and processes the list so that the values extracted
from the span matches can be used to compose a tuple of BucketSpan objects"""
spans = {}
for match in span_matches:
# Extract the key from the match (first element of the tuple).
key = match[0]
# Extract the value from the match (second element of the tuple).
# Split the value string by commas to get individual pairs,
# split each pair by ':' to get start and end, and convert them to integers.
value = [tuple(map(int, pair.split(':'))) for pair in match[1].split(',')]
# Store the processed value in the spans dictionary with the key.
spans[key] = value
if spans_name not in spans:
return None
out_spans = []
# Iterate over each start and end tuple in the list of tuples for the specified spans_name.
for start, end in spans[spans_name]:
# Compose a BucketSpan object with the start and end values
# and append it to the out_spans list.
out_spans.append(BucketSpan(start, end))
# Convert to tuple
out_spans_tuple = tuple(out_spans)
return out_spans_tuple
def _compose_deltas(deltas, deltas_name):
"""Takes a list of deltas matches (a dictionary) and a string (the expected delta list name),
and processes its elements to compose a tuple of integers representing the deltas"""
if deltas_name not in deltas:
return None
out_deltas = deltas.get(deltas_name)
if out_deltas is not None and out_deltas.strip():
elems = out_deltas.split(',')
# Convert each element in the list elems to an integer
# after stripping whitespace and create a tuple from these integers.
out_deltas_tuple = tuple(int(x.strip()) for x in elems)
return out_deltas_tuple
def _group_for_sample(sample, name, typ):
if typ == 'info':
# We can't distinguish between groups for info metrics.
return {}
if typ == 'summary' and sample.name == name:
d = sample.labels.copy()
del d['quantile']
return d
if typ == 'stateset':
d = sample.labels.copy()
del d[name]
return d
if typ in ['histogram', 'gaugehistogram'] and sample.name == name + '_bucket':
d = sample.labels.copy()
del d['le']
return d
return sample.labels
def _check_histogram(samples, name):
group = None
timestamp = None
def do_checks():
if bucket != float('+Inf'):
raise ValueError("+Inf bucket missing: " + name)
if count is not None and value != count:
raise ValueError("Count does not match +Inf value: " + name)
if has_sum and count is None:
raise ValueError("_count must be present if _sum is present: " + name)
if has_gsum and count is None:
raise ValueError("_gcount must be present if _gsum is present: " + name)
if not (has_sum or has_gsum) and count is not None:
raise ValueError("_sum/_gsum must be present if _count is present: " + name)
if has_negative_buckets and has_sum:
raise ValueError("Cannot have _sum with negative buckets: " + name)
if not has_negative_buckets and has_negative_gsum:
raise ValueError("Cannot have negative _gsum with non-negative buckets: " + name)
for s in samples:
suffix = s.name[len(name):]
g = _group_for_sample(s, name, 'histogram')
if len(suffix) == 0:
continue
if g != group or s.timestamp != timestamp:
if group is not None:
do_checks()
count = None
bucket = None
has_negative_buckets = False
has_sum = False
has_gsum = False
has_negative_gsum = False
value = 0
group = g
timestamp = s.timestamp
if suffix == '_bucket':
b = float(s.labels['le'])
if b < 0:
has_negative_buckets = True
if bucket is not None and b <= bucket:
raise ValueError("Buckets out of order: " + name)
if s.value < value:
raise ValueError("Bucket values out of order: " + name)
bucket = b
value = s.value
elif suffix in ['_count', '_gcount']:
count = s.value
elif suffix in ['_sum']:
has_sum = True
elif suffix in ['_gsum']:
has_gsum = True
if s.value < 0:
has_negative_gsum = True
if group is not None:
do_checks()
def text_fd_to_metric_families(fd):
"""Parse Prometheus text format from a file descriptor.
This is a laxer parser than the main Go parser,
so successful parsing does not imply that the parsed
text meets the specification.
Yields Metric's.
"""
name = None
allowed_names = []
eof = False
seen_names = set()
type_suffixes = {
'counter': ['_total', '_created'],
'summary': ['', '_count', '_sum', '_created'],
'histogram': ['_count', '_sum', '_bucket', '_created'],
'gaugehistogram': ['_gcount', '_gsum', '_bucket'],
'info': ['_info'],
}
def build_metric(name, documentation, typ, unit, samples):
if typ is None:
typ = 'unknown'
for suffix in set(type_suffixes.get(typ, []) + [""]):
if name + suffix in seen_names:
raise ValueError("Clashing name: " + name + suffix)
seen_names.add(name + suffix)
if documentation is None:
documentation = ''
if unit is None:
unit = ''
if unit and not name.endswith("_" + unit):
raise ValueError("Unit does not match metric name: " + name)
if unit and typ in ['info', 'stateset']:
raise ValueError("Units not allowed for this metric type: " + name)
if typ in ['histogram', 'gaugehistogram']:
_check_histogram(samples, name)
_validate_metric_name(name)
metric = Metric(name, documentation, typ, unit)
# TODO: check labelvalues are valid utf8
metric.samples = samples
return metric
is_nh = False
typ = None
for line in fd:
if line[-1] == '\n':
line = line[:-1]
if eof:
raise ValueError("Received line after # EOF: " + line)
if not line:
raise ValueError("Received blank line")
if line == '# EOF':
eof = True
elif line.startswith('#'):
parts = _split_quoted(line, ' ', 3)
if len(parts) < 4:
raise ValueError("Invalid line: " + line)
candidate_name, quoted = _unquote_unescape(parts[2])
if not quoted and not _is_valid_legacy_metric_name(candidate_name):
raise ValueError
if candidate_name == name and samples:
raise ValueError("Received metadata after samples: " + line)
if candidate_name != name:
if name is not None:
yield build_metric(name, documentation, typ, unit, samples)
# New metric
name = candidate_name
unit = None
typ = None
documentation = None
group = None
seen_groups = set()
group_timestamp = None
group_timestamp_samples = set()
samples = []
allowed_names = [candidate_name]
if parts[1] == 'HELP':
if documentation is not None:
raise ValueError("More than one HELP for metric: " + line)
documentation = _unescape_help(parts[3])
elif parts[1] == 'TYPE':
if typ is not None:
raise ValueError("More than one TYPE for metric: " + line)
typ = parts[3]
if typ == 'untyped':
raise ValueError("Invalid TYPE for metric: " + line)
allowed_names = [name + n for n in type_suffixes.get(typ, [''])]
elif parts[1] == 'UNIT':
if unit is not None:
raise ValueError("More than one UNIT for metric: " + line)
unit = parts[3]
else:
raise ValueError("Invalid line: " + line)
else:
if typ == 'histogram':
# set to true to account for native histograms naming exceptions/sanitizing differences
is_nh = True
sample = _parse_nh_sample(line, tuple(type_suffixes['histogram']))
# It's not a native histogram
if sample is None:
is_nh = False
sample = _parse_sample(line)
else:
is_nh = False
sample = _parse_sample(line)
if sample.name not in allowed_names and not is_nh:
if name is not None:
yield build_metric(name, documentation, typ, unit, samples)
# Start an unknown metric.
candidate_name, quoted = _unquote_unescape(sample.name)
if not quoted and not _is_valid_legacy_metric_name(candidate_name):
raise ValueError
name = candidate_name
documentation = None
unit = None
typ = 'unknown'
samples = []
group = None
group_timestamp = None
group_timestamp_samples = set()
seen_groups = set()
allowed_names = [sample.name]
if typ == 'stateset' and name not in sample.labels:
raise ValueError("Stateset missing label: " + line)
if (name + '_bucket' == sample.name
and (sample.labels.get('le', "NaN") == "NaN"
or _isUncanonicalNumber(sample.labels['le']))):
raise ValueError("Invalid le label: " + line)
if (name + '_bucket' == sample.name
and (not isinstance(sample.value, int) and not sample.value.is_integer())):
raise ValueError("Bucket value must be an integer: " + line)
if ((name + '_count' == sample.name or name + '_gcount' == sample.name)
and (not isinstance(sample.value, int) and not sample.value.is_integer())):
raise ValueError("Count value must be an integer: " + line)
if (typ == 'summary' and name == sample.name
and (not (0 <= float(sample.labels.get('quantile', -1)) <= 1)
or _isUncanonicalNumber(sample.labels['quantile']))):
raise ValueError("Invalid quantile label: " + line)
if not is_nh:
g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
if group is not None and g != group and g in seen_groups:
raise ValueError("Invalid metric grouping: " + line)
if group is not None and g == group:
if (sample.timestamp is None) != (group_timestamp is None):
raise ValueError("Mix of timestamp presence within a group: " + line)
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
raise ValueError("Timestamps went backwards within a group: " + line)
else:
group_timestamp_samples = set()
series_id = (sample.name, tuple(sorted(sample.labels.items())))
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
# Not a duplicate due to timestamp truncation.
samples.append(sample)
group_timestamp_samples.add(series_id)
group = g
group_timestamp = sample.timestamp
seen_groups.add(g)
else:
samples.append(sample)
if typ == 'stateset' and sample.value not in [0, 1]:
raise ValueError("Stateset samples can only have values zero and one: " + line)
if typ == 'info' and sample.value != 1:
raise ValueError("Info samples can only have value one: " + line)
if typ == 'summary' and name == sample.name and sample.value < 0:
raise ValueError("Quantile values cannot be negative: " + line)
if sample.name[len(name):] in ['_total', '_sum', '_count', '_bucket', '_gcount', '_gsum'] and math.isnan(
sample.value):
raise ValueError("Counter-like samples cannot be NaN: " + line)
if sample.name[len(name):] in ['_total', '_sum', '_count', '_bucket', '_gcount'] and sample.value < 0:
raise ValueError("Counter-like samples cannot be negative: " + line)
if sample.exemplar and not (
(typ in ['histogram', 'gaugehistogram'] and sample.name.endswith('_bucket'))
or (typ in ['counter'] and sample.name.endswith('_total'))):
raise ValueError("Invalid line only histogram/gaugehistogram buckets and counters can have exemplars: " + line)
if name is not None:
yield build_metric(name, documentation, typ, unit, samples)
if not eof:
raise ValueError("Missing # EOF at end")

View File

@@ -0,0 +1,367 @@
import io as StringIO
import re
import string
from typing import Dict, Iterable, List, Match, Optional, TextIO, Tuple
from .metrics_core import Metric
from .samples import Sample
from .validation import (
_is_valid_legacy_metric_name, _validate_labelname, _validate_metric_name,
)
def text_string_to_metric_families(text: str) -> Iterable[Metric]:
"""Parse Prometheus text format from a unicode string.
See text_fd_to_metric_families.
"""
yield from text_fd_to_metric_families(StringIO.StringIO(text))
ESCAPE_SEQUENCES = {
'\\\\': '\\',
'\\n': '\n',
'\\"': '"',
}
def replace_escape_sequence(match: Match[str]) -> str:
return ESCAPE_SEQUENCES[match.group(0)]
HELP_ESCAPING_RE = re.compile(r'\\[\\n]')
ESCAPING_RE = re.compile(r'\\[\\n"]')
def _replace_help_escaping(s: str) -> str:
return HELP_ESCAPING_RE.sub(replace_escape_sequence, s)
def _replace_escaping(s: str) -> str:
return ESCAPING_RE.sub(replace_escape_sequence, s)
def _is_character_escaped(s: str, charpos: int) -> bool:
num_bslashes = 0
while (charpos > num_bslashes
and s[charpos - 1 - num_bslashes] == '\\'):
num_bslashes += 1
return num_bslashes % 2 == 1
def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str]:
labels: Dict[str, str] = {}
# Copy original labels
sub_labels = labels_string.strip()
if openmetrics and sub_labels and sub_labels[0] == ',':
raise ValueError("leading comma: " + labels_string)
try:
# Process one label at a time
while sub_labels:
# The label name is before the equal, or if there's no equal, that's the
# metric name.
term, sub_labels = _next_term(sub_labels, openmetrics)
if not term:
if openmetrics:
raise ValueError("empty term in line: " + labels_string)
continue
quoted_name = False
operator_pos = _next_unquoted_char(term, '=')
if operator_pos == -1:
quoted_name = True
label_name = "__name__"
else:
value_start = _next_unquoted_char(term, '=')
label_name, quoted_name = _unquote_unescape(term[:value_start])
term = term[value_start + 1:]
if not quoted_name and not _is_valid_legacy_metric_name(label_name):
raise ValueError("unquoted UTF-8 metric name")
# Check for missing quotes
term = term.strip()
if not term or term[0] != '"':
raise ValueError
# The first quote is guaranteed to be after the equal.
# Find the last unescaped quote.
i = 1
while i < len(term):
i = term.index('"', i)
if not _is_character_escaped(term[:i], i):
break
i += 1
# The label value is between the first and last quote
quote_end = i + 1
if quote_end != len(term):
raise ValueError("unexpected text after quote: " + labels_string)
label_value, _ = _unquote_unescape(term[:quote_end])
if label_name == '__name__':
_validate_metric_name(label_name)
else:
_validate_labelname(label_name)
if label_name in labels:
raise ValueError("invalid line, duplicate label name: " + labels_string)
labels[label_name] = label_value
return labels
except ValueError:
raise ValueError("Invalid labels: " + labels_string)
def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]:
"""Extract the next comma-separated label term from the text.
Returns the stripped term and the stripped remainder of the string,
including the comma.
Raises ValueError if the term is empty and we're in openmetrics mode.
"""
# There may be a leading comma, which is fine here.
if text[0] == ',':
text = text[1:]
if not text:
return "", ""
if text[0] == ',':
raise ValueError("multiple commas")
splitpos = _next_unquoted_char(text, ',}')
if splitpos == -1:
splitpos = len(text)
term = text[:splitpos]
if not term and openmetrics:
raise ValueError("empty term:", term)
sublabels = text[splitpos:]
return term.strip(), sublabels.strip()
def _next_unquoted_char(text: str, chs: str, startidx: int = 0) -> int:
"""Return position of next unquoted character in tuple, or -1 if not found.
It is always assumed that the first character being checked is not already
inside quotes.
"""
i = startidx
in_quotes = False
if chs is None:
chs = string.whitespace
while i < len(text):
if text[i] == '"' and not _is_character_escaped(text, i):
in_quotes = not in_quotes
if not in_quotes:
if text[i] in chs:
return i
i += 1
return -1
def _last_unquoted_char(text: str, chs: str) -> int:
"""Return position of last unquoted character in list, or -1 if not found."""
i = len(text) - 1
in_quotes = False
if chs is None:
chs = string.whitespace
while i > 0:
if text[i] == '"' and not _is_character_escaped(text, i):
in_quotes = not in_quotes
if not in_quotes:
if text[i] in chs:
return i
i -= 1
return -1
def _split_quoted(text, separator, maxsplit=0):
"""Splits on split_ch similarly to strings.split, skipping separators if
they are inside quotes.
"""
tokens = ['']
x = 0
while x < len(text):
split_pos = _next_unquoted_char(text, separator, x)
if split_pos == -1:
tokens[-1] = text[x:]
x = len(text)
continue
if maxsplit > 0 and len(tokens) > maxsplit:
tokens[-1] = text[x:]
break
tokens[-1] = text[x:split_pos]
x = split_pos + 1
tokens.append('')
return tokens
def _unquote_unescape(text):
"""Returns the string, and true if it was quoted."""
if not text:
return text, False
quoted = False
text = text.strip()
if text[0] == '"':
if len(text) == 1 or text[-1] != '"':
raise ValueError("missing close quote")
text = text[1:-1]
quoted = True
if "\\" in text:
text = _replace_escaping(text)
return text, quoted
# If we have multiple values only consider the first
def _parse_value_and_timestamp(s: str) -> Tuple[float, Optional[float]]:
s = s.lstrip()
separator = " "
if separator not in s:
separator = "\t"
values = [value.strip() for value in s.split(separator) if value.strip()]
if not values:
return float(s), None
value = _parse_value(values[0])
timestamp = (_parse_value(values[-1]) / 1000) if len(values) > 1 else None
return value, timestamp
def _parse_value(value):
value = ''.join(value)
if value != value.strip() or '_' in value:
raise ValueError(f"Invalid value: {value!r}")
try:
return int(value)
except ValueError:
return float(value)
def _parse_sample(text):
separator = " # "
# Detect the labels in the text
label_start = _next_unquoted_char(text, '{')
if label_start == -1 or separator in text[:label_start]:
# We don't have labels, but there could be an exemplar.
name_end = _next_unquoted_char(text, ' \t')
name = text[:name_end].strip()
if not _is_valid_legacy_metric_name(name):
raise ValueError("invalid metric name:" + text)
# Parse the remaining text after the name
remaining_text = text[name_end + 1:]
value, timestamp = _parse_value_and_timestamp(remaining_text)
return Sample(name, {}, value, timestamp)
name = text[:label_start].strip()
label_end = _next_unquoted_char(text, '}')
labels = parse_labels(text[label_start + 1:label_end], False)
if not name:
# Name might be in the labels
if '__name__' not in labels:
raise ValueError
name = labels['__name__']
del labels['__name__']
elif '__name__' in labels:
raise ValueError("metric name specified more than once")
# Parsing labels succeeded, continue parsing the remaining text
remaining_text = text[label_end + 1:]
value, timestamp = _parse_value_and_timestamp(remaining_text)
return Sample(name, labels, value, timestamp)
def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
"""Parse Prometheus text format from a file descriptor.
This is a laxer parser than the main Go parser,
so successful parsing does not imply that the parsed
text meets the specification.
Yields Metric's.
"""
name = ''
documentation = ''
typ = 'untyped'
samples: List[Sample] = []
allowed_names = []
def build_metric(name: str, documentation: str, typ: str, samples: List[Sample]) -> Metric:
# Munge counters into OpenMetrics representation
# used internally.
if typ == 'counter':
if name.endswith('_total'):
name = name[:-6]
else:
new_samples = []
for s in samples:
new_samples.append(Sample(s[0] + '_total', *s[1:]))
samples = new_samples
metric = Metric(name, documentation, typ)
metric.samples = samples
return metric
for line in fd:
line = line.strip()
if line.startswith('#'):
parts = _split_quoted(line, None, 3)
if len(parts) < 2:
continue
candidate_name, quoted = '', False
if len(parts) > 2:
# Ignore comment tokens
if parts[1] != 'TYPE' and parts[1] != 'HELP':
continue
candidate_name, quoted = _unquote_unescape(parts[2])
if not quoted and not _is_valid_legacy_metric_name(candidate_name):
raise ValueError
if parts[1] == 'HELP':
if candidate_name != name:
if name != '':
yield build_metric(name, documentation, typ, samples)
# New metric
name = candidate_name
typ = 'untyped'
samples = []
allowed_names = [candidate_name]
if len(parts) == 4:
documentation = _replace_help_escaping(parts[3])
else:
documentation = ''
elif parts[1] == 'TYPE':
if len(parts) < 4:
raise ValueError
if candidate_name != name:
if name != '':
yield build_metric(name, documentation, typ, samples)
# New metric
name = candidate_name
documentation = ''
samples = []
typ = parts[3]
allowed_names = {
'counter': [''],
'gauge': [''],
'summary': ['_count', '_sum', ''],
'histogram': ['_count', '_sum', '_bucket'],
}.get(typ, [''])
allowed_names = [name + n for n in allowed_names]
elif line == '':
# Ignore blank lines
pass
else:
sample = _parse_sample(line)
if sample.name not in allowed_names:
if name != '':
yield build_metric(name, documentation, typ, samples)
# New metric, yield immediately as untyped singleton
name = ''
documentation = ''
typ = 'untyped'
samples = []
allowed_names = []
yield build_metric(sample[0], documentation, typ, [sample])
else:
samples.append(sample)
if name != '':
yield build_metric(name, documentation, typ, samples)

View File

@@ -0,0 +1,59 @@
import platform as pf
from typing import Any, Iterable, Optional
from .metrics_core import GaugeMetricFamily, Metric
from .registry import Collector, CollectorRegistry, REGISTRY
class PlatformCollector(Collector):
"""Collector for python platform information"""
def __init__(self,
registry: Optional[CollectorRegistry] = REGISTRY,
platform: Optional[Any] = None,
):
self._platform = pf if platform is None else platform
info = self._info()
system = self._platform.system()
if system == "Java":
info.update(self._java())
self._metrics = [
self._add_metric("python_info", "Python platform information", info)
]
if registry:
registry.register(self)
def collect(self) -> Iterable[Metric]:
return self._metrics
@staticmethod
def _add_metric(name, documentation, data):
labels = data.keys()
values = [data[k] for k in labels]
g = GaugeMetricFamily(name, documentation, labels=labels)
g.add_metric(values, 1)
return g
def _info(self):
major, minor, patchlevel = self._platform.python_version_tuple()
return {
"version": self._platform.python_version(),
"implementation": self._platform.python_implementation(),
"major": major,
"minor": minor,
"patchlevel": patchlevel
}
def _java(self):
java_version, _, vminfo, osinfo = self._platform.java_ver()
vm_name, vm_release, vm_vendor = vminfo
return {
"jvm_version": java_version,
"jvm_release": vm_release,
"jvm_vendor": vm_vendor,
"jvm_name": vm_name
}
PLATFORM_COLLECTOR = PlatformCollector()
"""PlatformCollector in default Registry REGISTRY"""

View File

@@ -0,0 +1,101 @@
import os
from typing import Callable, Iterable, Optional, Union
from .metrics_core import CounterMetricFamily, GaugeMetricFamily, Metric
from .registry import Collector, CollectorRegistry, REGISTRY
try:
import resource
_PAGESIZE = resource.getpagesize()
except ImportError:
# Not Unix
_PAGESIZE = 4096
class ProcessCollector(Collector):
"""Collector for Standard Exports such as cpu and memory."""
def __init__(self,
namespace: str = '',
pid: Callable[[], Union[int, str]] = lambda: 'self',
proc: str = '/proc',
registry: Optional[CollectorRegistry] = REGISTRY):
self._namespace = namespace
self._pid = pid
self._proc = proc
if namespace:
self._prefix = namespace + '_process_'
else:
self._prefix = 'process_'
self._ticks = 100.0
try:
self._ticks = os.sysconf('SC_CLK_TCK')
except (ValueError, TypeError, AttributeError, OSError):
pass
self._pagesize = _PAGESIZE
# This is used to test if we can access /proc.
self._btime = 0
try:
self._btime = self._boot_time()
except OSError:
pass
if registry:
registry.register(self)
def _boot_time(self):
with open(os.path.join(self._proc, 'stat'), 'rb') as stat:
for line in stat:
if line.startswith(b'btime '):
return float(line.split()[1])
def collect(self) -> Iterable[Metric]:
if not self._btime:
return []
pid = os.path.join(self._proc, str(self._pid()).strip())
result = []
try:
with open(os.path.join(pid, 'stat'), 'rb') as stat:
parts = (stat.read().split(b')')[-1].split())
vmem = GaugeMetricFamily(self._prefix + 'virtual_memory_bytes',
'Virtual memory size in bytes.', value=float(parts[20]))
rss = GaugeMetricFamily(self._prefix + 'resident_memory_bytes', 'Resident memory size in bytes.',
value=float(parts[21]) * self._pagesize)
start_time_secs = float(parts[19]) / self._ticks
start_time = GaugeMetricFamily(self._prefix + 'start_time_seconds',
'Start time of the process since unix epoch in seconds.',
value=start_time_secs + self._btime)
utime = float(parts[11]) / self._ticks
stime = float(parts[12]) / self._ticks
cpu = CounterMetricFamily(self._prefix + 'cpu_seconds_total',
'Total user and system CPU time spent in seconds.',
value=utime + stime)
result.extend([vmem, rss, start_time, cpu])
except OSError:
pass
try:
with open(os.path.join(pid, 'limits'), 'rb') as limits:
for line in limits:
if line.startswith(b'Max open file'):
max_fds = GaugeMetricFamily(self._prefix + 'max_fds',
'Maximum number of open file descriptors.',
value=float(line.split()[3]))
break
open_fds = GaugeMetricFamily(self._prefix + 'open_fds',
'Number of open file descriptors.',
len(os.listdir(os.path.join(pid, 'fd'))))
result.extend([open_fds, max_fds])
except OSError:
pass
return result
PROCESS_COLLECTOR = ProcessCollector()
"""Default ProcessCollector in default Registry REGISTRY."""

View File

@@ -0,0 +1,168 @@
from abc import ABC, abstractmethod
import copy
from threading import Lock
from typing import Dict, Iterable, List, Optional
from .metrics_core import Metric
# Ideally this would be a Protocol, but Protocols are only available in Python >= 3.8.
class Collector(ABC):
@abstractmethod
def collect(self) -> Iterable[Metric]:
pass
class _EmptyCollector(Collector):
def collect(self) -> Iterable[Metric]:
return []
class CollectorRegistry(Collector):
"""Metric collector registry.
Collectors must have a no-argument method 'collect' that returns a list of
Metric objects. The returned metrics should be consistent with the Prometheus
exposition formats.
"""
def __init__(self, auto_describe: bool = False, target_info: Optional[Dict[str, str]] = None):
self._collector_to_names: Dict[Collector, List[str]] = {}
self._names_to_collectors: Dict[str, Collector] = {}
self._auto_describe = auto_describe
self._lock = Lock()
self._target_info: Optional[Dict[str, str]] = {}
self.set_target_info(target_info)
def register(self, collector: Collector) -> None:
"""Add a collector to the registry."""
with self._lock:
names = self._get_names(collector)
duplicates = set(self._names_to_collectors).intersection(names)
if duplicates:
raise ValueError(
'Duplicated timeseries in CollectorRegistry: {}'.format(
duplicates))
for name in names:
self._names_to_collectors[name] = collector
self._collector_to_names[collector] = names
def unregister(self, collector: Collector) -> None:
"""Remove a collector from the registry."""
with self._lock:
for name in self._collector_to_names[collector]:
del self._names_to_collectors[name]
del self._collector_to_names[collector]
def _get_names(self, collector):
"""Get names of timeseries the collector produces and clashes with."""
desc_func = None
# If there's a describe function, use it.
try:
desc_func = collector.describe
except AttributeError:
pass
# Otherwise, if auto describe is enabled use the collect function.
if not desc_func and self._auto_describe:
desc_func = collector.collect
if not desc_func:
return []
result = []
type_suffixes = {
'counter': ['_total', '_created'],
'summary': ['_sum', '_count', '_created'],
'histogram': ['_bucket', '_sum', '_count', '_created'],
'gaugehistogram': ['_bucket', '_gsum', '_gcount'],
'info': ['_info'],
}
for metric in desc_func():
result.append(metric.name)
for suffix in type_suffixes.get(metric.type, []):
result.append(metric.name + suffix)
return result
def collect(self) -> Iterable[Metric]:
"""Yields metrics from the collectors in the registry."""
collectors = None
ti = None
with self._lock:
collectors = copy.copy(self._collector_to_names)
if self._target_info:
ti = self._target_info_metric()
if ti:
yield ti
for collector in collectors:
yield from collector.collect()
def restricted_registry(self, names: Iterable[str]) -> "RestrictedRegistry":
"""Returns object that only collects some metrics.
Returns an object which upon collect() will return
only samples with the given names.
Intended usage is:
generate_latest(REGISTRY.restricted_registry(['a_timeseries']))
Experimental."""
names = set(names)
return RestrictedRegistry(names, self)
def set_target_info(self, labels: Optional[Dict[str, str]]) -> None:
with self._lock:
if labels:
if not self._target_info and 'target_info' in self._names_to_collectors:
raise ValueError('CollectorRegistry already contains a target_info metric')
self._names_to_collectors['target_info'] = _EmptyCollector()
elif self._target_info:
self._names_to_collectors.pop('target_info', None)
self._target_info = labels
def get_target_info(self) -> Optional[Dict[str, str]]:
with self._lock:
return self._target_info
def _target_info_metric(self):
m = Metric('target', 'Target metadata', 'info')
m.add_sample('target_info', self._target_info, 1)
return m
def get_sample_value(self, name: str, labels: Optional[Dict[str, str]] = None) -> Optional[float]:
"""Returns the sample value, or None if not found.
This is inefficient, and intended only for use in unittests.
"""
if labels is None:
labels = {}
for metric in self.collect():
for s in metric.samples:
if s.name == name and s.labels == labels:
return s.value
return None
class RestrictedRegistry:
def __init__(self, names: Iterable[str], registry: CollectorRegistry):
self._name_set = set(names)
self._registry = registry
def collect(self) -> Iterable[Metric]:
collectors = set()
target_info_metric = None
with self._registry._lock:
if 'target_info' in self._name_set and self._registry._target_info:
target_info_metric = self._registry._target_info_metric()
for name in self._name_set:
if name != 'target_info' and name in self._registry._names_to_collectors:
collectors.add(self._registry._names_to_collectors[name])
if target_info_metric:
yield target_info_metric
for collector in collectors:
for metric in collector.collect():
m = metric._restricted_metric(self._name_set)
if m:
yield m
REGISTRY = CollectorRegistry(auto_describe=True)

View File

@@ -0,0 +1,73 @@
from typing import Dict, NamedTuple, Optional, Sequence, Union
class Timestamp:
"""A nanosecond-resolution timestamp."""
def __init__(self, sec: float, nsec: float) -> None:
if nsec < 0 or nsec >= 1e9:
raise ValueError(f"Invalid value for nanoseconds in Timestamp: {nsec}")
if sec < 0:
nsec = -nsec
self.sec: int = int(sec)
self.nsec: int = int(nsec)
def __str__(self) -> str:
return f"{self.sec}.{self.nsec:09d}"
def __repr__(self) -> str:
return f"Timestamp({self.sec}, {self.nsec})"
def __float__(self) -> float:
return float(self.sec) + float(self.nsec) / 1e9
def __eq__(self, other: object) -> bool:
return isinstance(other, Timestamp) and self.sec == other.sec and self.nsec == other.nsec
def __ne__(self, other: object) -> bool:
return not self == other
def __gt__(self, other: "Timestamp") -> bool:
return self.nsec > other.nsec if self.sec == other.sec else self.sec > other.sec
def __lt__(self, other: "Timestamp") -> bool:
return self.nsec < other.nsec if self.sec == other.sec else self.sec < other.sec
# BucketSpan is experimental and subject to change at any time.
class BucketSpan(NamedTuple):
offset: int
length: int
# NativeHistogram is experimental and subject to change at any time.
class NativeHistogram(NamedTuple):
count_value: float
sum_value: float
schema: int
zero_threshold: float
zero_count: float
pos_spans: Optional[Sequence[BucketSpan]] = None
neg_spans: Optional[Sequence[BucketSpan]] = None
pos_deltas: Optional[Sequence[int]] = None
neg_deltas: Optional[Sequence[int]] = None
# Timestamp and exemplar are optional.
# Value can be an int or a float.
# Timestamp can be a float containing a unixtime in seconds,
# a Timestamp object, or None.
# Exemplar can be an Exemplar object, or None.
class Exemplar(NamedTuple):
labels: Dict[str, str]
value: float
timestamp: Optional[Union[float, Timestamp]] = None
class Sample(NamedTuple):
name: str
labels: Dict[str, str]
value: float
timestamp: Optional[Union[float, Timestamp]] = None
exemplar: Optional[Exemplar] = None
native_histogram: Optional[NativeHistogram] = None

View File

@@ -0,0 +1,3 @@
from ._exposition import MetricsResource
__all__ = ['MetricsResource']

View File

@@ -0,0 +1,8 @@
from twisted.internet import reactor
from twisted.web.wsgi import WSGIResource
from .. import exposition, REGISTRY
MetricsResource = lambda registry=REGISTRY: WSGIResource(
reactor, reactor.getThreadPool(), exposition.make_wsgi_app(registry)
)

View File

@@ -0,0 +1,24 @@
import math
INF = float("inf")
MINUS_INF = float("-inf")
NaN = float("NaN")
def floatToGoString(d):
d = float(d)
if d == INF:
return '+Inf'
elif d == MINUS_INF:
return '-Inf'
elif math.isnan(d):
return 'NaN'
else:
s = repr(d)
dot = s.find('.')
# Go switches to exponents sooner than Python.
# We only need to care about positive values for le/quantile.
if d > 0 and dot > 6:
mantissa = f'{s[0]}.{s[1:dot]}{s[dot + 1:]}'.rstrip('0.')
return f'{mantissa}e+0{dot - 1}'
return s

View File

@@ -0,0 +1,123 @@
import os
import re
METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:][a-zA-Z0-9_:]*$')
METRIC_LABEL_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
RESERVED_METRIC_LABEL_NAME_RE = re.compile(r'^__.*$')
def _init_legacy_validation() -> bool:
"""Retrieve name validation setting from environment."""
return os.environ.get("PROMETHEUS_LEGACY_NAME_VALIDATION", 'False').lower() in ('true', '1', 't')
_legacy_validation = _init_legacy_validation()
def get_legacy_validation() -> bool:
"""Return the current status of the legacy validation setting."""
global _legacy_validation
return _legacy_validation
def disable_legacy_validation():
"""Disable legacy name validation, instead allowing all UTF8 characters."""
global _legacy_validation
_legacy_validation = False
def enable_legacy_validation():
"""Enable legacy name validation instead of allowing all UTF8 characters."""
global _legacy_validation
_legacy_validation = True
def _validate_metric_name(name: str) -> None:
"""Raises ValueError if the provided name is not a valid metric name.
This check uses the global legacy validation setting to determine the validation scheme.
"""
if not name:
raise ValueError("metric name cannot be empty")
global _legacy_validation
if _legacy_validation:
if not METRIC_NAME_RE.match(name):
raise ValueError("invalid metric name " + name)
try:
name.encode('utf-8')
except UnicodeDecodeError:
raise ValueError("invalid metric name " + name)
def _is_valid_legacy_metric_name(name: str) -> bool:
"""Returns true if the provided metric name conforms to the legacy validation scheme."""
return METRIC_NAME_RE.match(name) is not None
def _validate_metric_label_name_token(tok: str) -> None:
"""Raises ValueError if a parsed label name token is invalid.
UTF-8 names must be quoted.
"""
if not tok:
raise ValueError("invalid label name token " + tok)
global _legacy_validation
quoted = tok[0] == '"' and tok[-1] == '"'
if not quoted or _legacy_validation:
if not METRIC_LABEL_NAME_RE.match(tok):
raise ValueError("invalid label name token " + tok)
return
try:
tok.encode('utf-8')
except UnicodeDecodeError:
raise ValueError("invalid label name token " + tok)
def _validate_labelname(l):
"""Raises ValueError if the provided name is not a valid label name.
This check uses the global legacy validation setting to determine the validation scheme.
"""
if get_legacy_validation():
if not METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Invalid label metric name: ' + l)
if RESERVED_METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Reserved label metric name: ' + l)
else:
try:
l.encode('utf-8')
except UnicodeDecodeError:
raise ValueError('Invalid label metric name: ' + l)
if RESERVED_METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Reserved label metric name: ' + l)
def _is_valid_legacy_labelname(l: str) -> bool:
"""Returns true if the provided label name conforms to the legacy validation scheme."""
if METRIC_LABEL_NAME_RE.match(l) is None:
return False
return RESERVED_METRIC_LABEL_NAME_RE.match(l) is None
def _validate_labelnames(cls, labelnames):
"""Raises ValueError if any of the provided names is not a valid label name.
This check uses the global legacy validation setting to determine the validation scheme.
"""
labelnames = tuple(labelnames)
for l in labelnames:
_validate_labelname(l)
if l in cls._reserved_labelnames:
raise ValueError('Reserved label methe fric name: ' + l)
return labelnames
def _validate_exemplar(exemplar):
"""Raises ValueError if the exemplar is invalid."""
runes = 0
for k, v in exemplar.items():
_validate_labelname(k)
runes += len(k)
runes += len(v)
if runes > 128:
raise ValueError('Exemplar labels have %d UTF-8 characters, exceeding the limit of 128')

View File

@@ -0,0 +1,139 @@
import os
from threading import Lock
import warnings
from .mmap_dict import mmap_key, MmapedDict
class MutexValue:
"""A float protected by a mutex."""
_multiprocess = False
def __init__(self, typ, metric_name, name, labelnames, labelvalues, help_text, **kwargs):
self._value = 0.0
self._exemplar = None
self._lock = Lock()
def inc(self, amount):
with self._lock:
self._value += amount
def set(self, value, timestamp=None):
with self._lock:
self._value = value
def set_exemplar(self, exemplar):
with self._lock:
self._exemplar = exemplar
def get(self):
with self._lock:
return self._value
def get_exemplar(self):
with self._lock:
return self._exemplar
def MultiProcessValue(process_identifier=os.getpid):
"""Returns a MmapedValue class based on a process_identifier function.
The 'process_identifier' function MUST comply with this simple rule:
when called in simultaneously running processes it MUST return distinct values.
Using a different function than the default 'os.getpid' is at your own risk.
"""
files = {}
values = []
pid = {'value': process_identifier()}
# Use a single global lock when in multi-processing mode
# as we presume this means there is no threading going on.
# This avoids the need to also have mutexes in __MmapDict.
lock = Lock()
class MmapedValue:
"""A float protected by a mutex backed by a per-process mmaped file."""
_multiprocess = True
def __init__(self, typ, metric_name, name, labelnames, labelvalues, help_text, multiprocess_mode='', **kwargs):
self._params = typ, metric_name, name, labelnames, labelvalues, help_text, multiprocess_mode
# This deprecation warning can go away in a few releases when removing the compatibility
if 'prometheus_multiproc_dir' in os.environ and 'PROMETHEUS_MULTIPROC_DIR' not in os.environ:
os.environ['PROMETHEUS_MULTIPROC_DIR'] = os.environ['prometheus_multiproc_dir']
warnings.warn("prometheus_multiproc_dir variable has been deprecated in favor of the upper case naming PROMETHEUS_MULTIPROC_DIR", DeprecationWarning)
with lock:
self.__check_for_pid_change()
self.__reset()
values.append(self)
def __reset(self):
typ, metric_name, name, labelnames, labelvalues, help_text, multiprocess_mode = self._params
if typ == 'gauge':
file_prefix = typ + '_' + multiprocess_mode
else:
file_prefix = typ
if file_prefix not in files:
filename = os.path.join(
os.environ.get('PROMETHEUS_MULTIPROC_DIR'),
'{}_{}.db'.format(file_prefix, pid['value']))
files[file_prefix] = MmapedDict(filename)
self._file = files[file_prefix]
self._key = mmap_key(metric_name, name, labelnames, labelvalues, help_text)
self._value, self._timestamp = self._file.read_value(self._key)
def __check_for_pid_change(self):
actual_pid = process_identifier()
if pid['value'] != actual_pid:
pid['value'] = actual_pid
# There has been a fork(), reset all the values.
for f in files.values():
f.close()
files.clear()
for value in values:
value.__reset()
def inc(self, amount):
with lock:
self.__check_for_pid_change()
self._value += amount
self._timestamp = 0.0
self._file.write_value(self._key, self._value, self._timestamp)
def set(self, value, timestamp=None):
with lock:
self.__check_for_pid_change()
self._value = value
self._timestamp = timestamp or 0.0
self._file.write_value(self._key, self._value, self._timestamp)
def set_exemplar(self, exemplar):
# TODO: Implement exemplars for multiprocess mode.
return
def get(self):
with lock:
self.__check_for_pid_change()
return self._value
def get_exemplar(self):
# TODO: Implement exemplars for multiprocess mode.
return None
return MmapedValue
def get_value_class():
# Should we enable multi-process mode?
# This needs to be chosen before the first metric is constructed,
# and as that may be in some arbitrary library the user/admin has
# no control over we use an environment variable.
if 'prometheus_multiproc_dir' in os.environ or 'PROMETHEUS_MULTIPROC_DIR' in os.environ:
return MultiProcessValue()
else:
return MutexValue
ValueClass = get_value_class()