lifecycle: cleanup prometheus (#2972)

* remove high cardinality labels

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* retry worker number for prometheus multiprocess id

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* revert to pid, use subdirectories

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* cleanup more

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* use worker id based off of https://github.com/benoitc/gunicorn/issues/1352

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* fix missing app label

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* tests/e2e: remove static names

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* fix

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>
This commit is contained in:
Jens L
2022-05-29 21:45:25 +02:00
committed by GitHub
parent 9f2529c886
commit 3eb466ff4b
19 changed files with 129 additions and 148 deletions

View File

@ -37,11 +37,6 @@ MODE_FILE="/tmp/authentik-mode"
if [[ "$1" == "server" ]]; then
wait_for_db
echo "server" > $MODE_FILE
# We only set PROMETHEUS_MULTIPROC_DIR for serer, as with the worker it just fills up the disk
# as one file is created per process
#
# Set to TMPDIR instead hardcoded path so this can be used outside docker too
export PROMETHEUS_MULTIPROC_DIR=$TMPDIR
python -m lifecycle.migrate
/authentik-proxy
elif [[ "$1" == "worker" ]]; then

View File

@ -3,15 +3,23 @@ import os
import pwd
from hashlib import sha512
from multiprocessing import cpu_count
from os import makedirs
from pathlib import Path
from tempfile import gettempdir
from typing import TYPE_CHECKING
import structlog
from kubernetes.config.incluster_config import SERVICE_HOST_ENV_NAME
from prometheus_client.values import MultiProcessValue
from authentik import get_full_version
from authentik.lib.config import CONFIG
from authentik.lib.utils.http import get_http_session
from authentik.lib.utils.reflection import get_env
from lifecycle.worker import DjangoUvicornWorker
if TYPE_CHECKING:
from gunicorn.arbiter import Arbiter
bind = "127.0.0.1:8000"
@ -22,19 +30,27 @@ try:
except KeyError:
pass
_tmp = Path(gettempdir())
worker_class = "lifecycle.worker.DjangoUvicornWorker"
worker_tmp_dir = gettempdir()
worker_tmp_dir = str(_tmp.joinpath("authentik_worker_tmp"))
prometheus_tmp_dir = str(_tmp.joinpath("authentik_prometheus_tmp"))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "authentik.root.settings")
os.environ.setdefault("PROMETHEUS_MULTIPROC_DIR", prometheus_tmp_dir)
makedirs(worker_tmp_dir, exist_ok=True)
makedirs(prometheus_tmp_dir, exist_ok=True)
max_requests = 1000
max_requests_jitter = 50
_debug = CONFIG.y_bool("DEBUG", False)
logconfig_dict = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"json_formatter": {
"json": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.processors.JSONRenderer(),
"foreign_pre_chain": [
@ -43,14 +59,20 @@ logconfig_dict = {
structlog.processors.TimeStamper(),
structlog.processors.StackInfoRenderer(),
],
}
},
"console": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.dev.ConsoleRenderer(colors=True),
"foreign_pre_chain": [
structlog.stdlib.add_log_level,
structlog.stdlib.add_logger_name,
structlog.processors.TimeStamper(),
structlog.processors.StackInfoRenderer(),
],
},
},
"handlers": {
"error_console": {
"class": "logging.StreamHandler",
"formatter": "json_formatter",
},
"console": {"class": "logging.StreamHandler", "formatter": "json_formatter"},
"console": {"class": "logging.StreamHandler", "formatter": "json" if _debug else "console"},
},
"loggers": {
"uvicorn": {"handlers": ["console"], "level": "WARNING", "propagate": False},
@ -69,11 +91,54 @@ workers = int(os.environ.get("WORKERS", default_workers))
threads = int(os.environ.get("THREADS", 4))
# pylint: disable=unused-argument
def worker_exit(server, worker):
def post_fork(server: "Arbiter", worker: DjangoUvicornWorker):
"""Tell prometheus to use worker number instead of process ID for multiprocess"""
from prometheus_client import values
values.ValueClass = MultiProcessValue(lambda: worker._worker_id)
# pylint: disable=unused-argument
def worker_exit(server: "Arbiter", worker: DjangoUvicornWorker):
"""Remove pid dbs when worker is shutdown"""
from prometheus_client import multiprocess
multiprocess.mark_process_dead(worker.pid)
multiprocess.mark_process_dead(worker._worker_id)
def on_starting(server: "Arbiter"):
"""Attach a set of IDs that can be temporarily re-used.
Used on reloads when each worker exists twice."""
server._worker_id_overload = set()
def nworkers_changed(server: "Arbiter", new_value, old_value):
"""Gets called on startup too.
Set the current number of workers. Required if we raise the worker count
temporarily using TTIN because server.cfg.workers won't be updated and if
one of those workers dies, we wouldn't know the ids go that far."""
server._worker_id_current_workers = new_value
def _next_worker_id(server: "Arbiter"):
"""If there are IDs open for re-use, take one. Else look for a free one."""
if server._worker_id_overload:
return server._worker_id_overload.pop()
in_use = set(w._worker_id for w in tuple(server.WORKERS.values()) if w.alive)
free = set(range(1, server._worker_id_current_workers + 1)) - in_use
return free.pop()
def on_reload(server: "Arbiter"):
"""Add a full set of ids into overload so it can be re-used once."""
server._worker_id_overload = set(range(1, server.cfg.workers + 1))
def pre_fork(server: "Arbiter", worker: DjangoUvicornWorker):
"""Attach the next free worker_id before forking off."""
worker._worker_id = _next_worker_id(server)
if not CONFIG.y_bool("disable_startup_analytics", False):