providers/scim, sources/ldap: switch to using postgres advisory locks instead of redis locks (#9511)

* providers/scim, sources/ldap: switch to using postgres advisory locks instead of redis locks

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>

* website/integrations: discord: fix typo

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>

* fix timeout logic

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>

* remove redis locks completely

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>

* Apply suggestions from code review

Signed-off-by: Jens L. <jens@beryju.org>

---------

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>
Signed-off-by: Jens L. <jens@beryju.org>
Co-authored-by: Jens L <jens@goauthentik.io>
This commit is contained in:
Marc 'risson' Schmitt
2024-05-23 13:41:42 +02:00
committed by GitHub
parent fbab822db1
commit fbad02fac1
8 changed files with 85 additions and 60 deletions

View File

@ -3,3 +3,4 @@
PAGE_SIZE = 100 PAGE_SIZE = 100
PAGE_TIMEOUT = 60 * 60 * 0.5 # Half an hour PAGE_TIMEOUT = 60 * 60 * 0.5 # Half an hour
HTTP_CONFLICT = 409 HTTP_CONFLICT = 409
LOCK_ACQUIRE_TIMEOUT = 5

View File

@ -1,11 +1,11 @@
from typing import Any, Self from typing import Any, Self
from django.core.cache import cache import pglock
from django.db import connection
from django.db.models import Model, QuerySet, TextChoices from django.db.models import Model, QuerySet, TextChoices
from redis.lock import Lock
from authentik.core.models import Group, User from authentik.core.models import Group, User
from authentik.lib.sync.outgoing import PAGE_TIMEOUT from authentik.lib.sync.outgoing import LOCK_ACQUIRE_TIMEOUT
from authentik.lib.sync.outgoing.base import BaseOutgoingSyncClient from authentik.lib.sync.outgoing.base import BaseOutgoingSyncClient
@ -32,10 +32,10 @@ class OutgoingSyncProvider(Model):
raise NotImplementedError raise NotImplementedError
@property @property
def sync_lock(self) -> Lock: def sync_lock(self) -> pglock.advisory:
"""Redis lock to prevent multiple parallel syncs happening""" """Postgres lock for syncing SCIM to prevent multiple parallel syncs happening"""
return Lock( return pglock.advisory(
cache.client.get_client(), lock_id=f"goauthentik.io/{connection.schema_name}/providers/outgoing-sync/{str(self.pk)}",
name=f"goauthentik.io/providers/outgoing-sync/{str(self.pk)}", timeout=LOCK_ACQUIRE_TIMEOUT,
timeout=(60 * 60 * PAGE_TIMEOUT) * 3, side_effect=pglock.Raise,
) )

View File

@ -4,6 +4,7 @@ from celery.exceptions import Retry
from celery.result import allow_join_result from celery.result import allow_join_result
from django.core.paginator import Paginator from django.core.paginator import Paginator
from django.db.models import Model, QuerySet from django.db.models import Model, QuerySet
from django.db.utils import OperationalError
from django.utils.text import slugify from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from structlog.stdlib import BoundLogger, get_logger from structlog.stdlib import BoundLogger, get_logger
@ -64,40 +65,40 @@ class SyncTasks:
).first() ).first()
if not provider: if not provider:
return return
lock = provider.sync_lock
if lock.locked():
self.logger.debug("Sync locked, skipping task", source=provider.name)
return
task.set_uid(slugify(provider.name)) task.set_uid(slugify(provider.name))
messages = [] messages = []
messages.append(_("Starting full provider sync")) messages.append(_("Starting full provider sync"))
self.logger.debug("Starting provider sync") self.logger.debug("Starting provider sync")
users_paginator = Paginator(provider.get_object_qs(User), PAGE_SIZE) users_paginator = Paginator(provider.get_object_qs(User), PAGE_SIZE)
groups_paginator = Paginator(provider.get_object_qs(Group), PAGE_SIZE) groups_paginator = Paginator(provider.get_object_qs(Group), PAGE_SIZE)
with allow_join_result(), lock: try:
try: with allow_join_result(), provider.sync_lock:
for page in users_paginator.page_range: try:
messages.append(_("Syncing page %(page)d of users" % {"page": page})) for page in users_paginator.page_range:
for msg in sync_objects.apply_async( messages.append(_("Syncing page %(page)d of users" % {"page": page}))
args=(class_to_path(User), page, provider_pk), for msg in sync_objects.apply_async(
time_limit=PAGE_TIMEOUT, args=(class_to_path(User), page, provider_pk),
soft_time_limit=PAGE_TIMEOUT, time_limit=PAGE_TIMEOUT,
).get(): soft_time_limit=PAGE_TIMEOUT,
messages.append(msg) ).get():
for page in groups_paginator.page_range: messages.append(msg)
messages.append(_("Syncing page %(page)d of groups" % {"page": page})) for page in groups_paginator.page_range:
for msg in sync_objects.apply_async( messages.append(_("Syncing page %(page)d of groups" % {"page": page}))
args=(class_to_path(Group), page, provider_pk), for msg in sync_objects.apply_async(
time_limit=PAGE_TIMEOUT, args=(class_to_path(Group), page, provider_pk),
soft_time_limit=PAGE_TIMEOUT, time_limit=PAGE_TIMEOUT,
).get(): soft_time_limit=PAGE_TIMEOUT,
messages.append(msg) ).get():
except TransientSyncException as exc: messages.append(msg)
self.logger.warning("transient sync exception", exc=exc) except TransientSyncException as exc:
raise task.retry(exc=exc) from exc self.logger.warning("transient sync exception", exc=exc)
except StopSync as exc: raise task.retry(exc=exc) from exc
task.set_error(exc) except StopSync as exc:
return task.set_error(exc)
return
except OperationalError:
self.logger.debug("Failed to acquire sync lock, skipping", provider=provider.name)
return
task.set_status(TaskStatus.SUCCESSFUL, *messages) task.set_status(TaskStatus.SUCCESSFUL, *messages)
def sync_objects(self, object_type: str, page: int, provider_pk: int): def sync_objects(self, object_type: str, page: int, provider_pk: int):

View File

@ -60,6 +60,8 @@ SHARED_APPS = [
"django_filters", "django_filters",
"drf_spectacular", "drf_spectacular",
"django_prometheus", "django_prometheus",
"pgactivity",
"pglock",
"channels", "channels",
] ]
TENANT_APPS = [ TENANT_APPS = [

View File

@ -6,19 +6,19 @@ from shutil import rmtree
from ssl import CERT_REQUIRED from ssl import CERT_REQUIRED
from tempfile import NamedTemporaryFile, mkdtemp from tempfile import NamedTemporaryFile, mkdtemp
from django.core.cache import cache import pglock
from django.db import connection, models from django.db import connection, models
from django.templatetags.static import static from django.templatetags.static import static
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from ldap3 import ALL, NONE, RANDOM, Connection, Server, ServerPool, Tls from ldap3 import ALL, NONE, RANDOM, Connection, Server, ServerPool, Tls
from ldap3.core.exceptions import LDAPException, LDAPInsufficientAccessRightsResult, LDAPSchemaError from ldap3.core.exceptions import LDAPException, LDAPInsufficientAccessRightsResult, LDAPSchemaError
from redis.lock import Lock
from rest_framework.serializers import Serializer from rest_framework.serializers import Serializer
from authentik.core.models import Group, PropertyMapping, Source from authentik.core.models import Group, PropertyMapping, Source
from authentik.crypto.models import CertificateKeyPair from authentik.crypto.models import CertificateKeyPair
from authentik.lib.config import CONFIG from authentik.lib.config import CONFIG
from authentik.lib.models import DomainlessURLValidator from authentik.lib.models import DomainlessURLValidator
from authentik.lib.sync.outgoing import LOCK_ACQUIRE_TIMEOUT
LDAP_TIMEOUT = 15 LDAP_TIMEOUT = 15
@ -209,15 +209,12 @@ class LDAPSource(Source):
return RuntimeError("Failed to bind") return RuntimeError("Failed to bind")
@property @property
def sync_lock(self) -> Lock: def sync_lock(self) -> pglock.advisory:
"""Redis lock for syncing LDAP to prevent multiple parallel syncs happening""" """Postgres lock for syncing LDAP to prevent multiple parallel syncs happening"""
return Lock( return pglock.advisory(
cache.client.get_client(), lock_id=f"goauthentik.io/{connection.schema_name}/sources/ldap/sync/{self.slug}",
name=f"goauthentik.io/sources/ldap/sync/{connection.schema_name}-{self.slug}", timeout=LOCK_ACQUIRE_TIMEOUT,
# Convert task timeout hours to seconds, and multiply times 3 side_effect=pglock.Raise,
# (see authentik/sources/ldap/tasks.py:54)
# multiply by 3 to add even more leeway
timeout=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 3,
) )
def check_connection(self) -> dict[str, dict[str, str]]: def check_connection(self) -> dict[str, dict[str, str]]:

View File

@ -4,8 +4,8 @@ from uuid import uuid4
from celery import chain, group from celery import chain, group
from django.core.cache import cache from django.core.cache import cache
from django.db.utils import OperationalError
from ldap3.core.exceptions import LDAPException from ldap3.core.exceptions import LDAPException
from redis.exceptions import LockError
from structlog.stdlib import get_logger from structlog.stdlib import get_logger
from authentik.events.models import SystemTask as DBSystemTask from authentik.events.models import SystemTask as DBSystemTask
@ -64,12 +64,8 @@ def ldap_sync_single(source_pk: str):
source: LDAPSource = LDAPSource.objects.filter(pk=source_pk).first() source: LDAPSource = LDAPSource.objects.filter(pk=source_pk).first()
if not source: if not source:
return return
lock = source.sync_lock
if lock.locked():
LOGGER.debug("LDAP sync locked, skipping task", source=source.slug)
return
try: try:
with lock: with source.sync_lock:
# Delete all sync tasks from the cache # Delete all sync tasks from the cache
DBSystemTask.objects.filter(name="ldap_sync", uid__startswith=source.slug).delete() DBSystemTask.objects.filter(name="ldap_sync", uid__startswith=source.slug).delete()
task = chain( task = chain(
@ -84,10 +80,8 @@ def ldap_sync_single(source_pk: str):
), ),
) )
task() task()
except LockError: except OperationalError:
# This should never happen, we check if the lock is locked above so this LOGGER.debug("Failed to acquire lock for LDAP sync, skipping task", source=source.slug)
# would only happen if there was some other timeout
LOGGER.debug("Failed to acquire lock for LDAP sync", source=source.slug)
def ldap_sync_paginator(source: LDAPSource, sync: type[BaseLDAPSynchronizer]) -> list: def ldap_sync_paginator(source: LDAPSource, sync: type[BaseLDAPSynchronizer]) -> list:

33
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]] [[package]]
name = "aiohttp" name = "aiohttp"
@ -1236,6 +1236,35 @@ files = [
[package.dependencies] [package.dependencies]
Django = ">=3.2" Django = ">=3.2"
[[package]]
name = "django-pgactivity"
version = "1.4.1"
description = "Monitor, kill, and analyze Postgres queries."
optional = false
python-versions = "<4,>=3.8.0"
files = [
{file = "django_pgactivity-1.4.1-py3-none-any.whl", hash = "sha256:e7affa4dc08e7650092a582375729081362a3103f1148e34e8406ddf114eeb95"},
{file = "django_pgactivity-1.4.1.tar.gz", hash = "sha256:00da0f0156daa37f5f113c7a6d9378a6f6d111e44f20d3b30b367d5428e18b07"},
]
[package.dependencies]
django = ">=3"
[[package]]
name = "django-pglock"
version = "1.5.1"
description = "Postgres locking routines and lock table access."
optional = false
python-versions = "<4,>=3.8.0"
files = [
{file = "django_pglock-1.5.1-py3-none-any.whl", hash = "sha256:d3b977922abbaffd43968714b69cdab7453866adf2b0695fb497491748d7bc67"},
{file = "django_pglock-1.5.1.tar.gz", hash = "sha256:291903d5d877b68558003e1d64d764ebd5590344ba3b7aa1d5127df5947869b1"},
]
[package.dependencies]
django = ">=3"
django-pgactivity = ">=1.2,<2"
[[package]] [[package]]
name = "django-prometheus" name = "django-prometheus"
version = "2.3.1" version = "2.3.1"
@ -5303,4 +5332,4 @@ files = [
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "~3.12" python-versions = "~3.12"
content-hash = "6399c90a2adca3e7119277bf4d0649fe0826d5fb4454a23b1b1fad3e64a1fe90" content-hash = "112c777b6cf6bec7583f3994cd1fa0165d046d09a2f378990ba6bb626f9739ca"

View File

@ -97,6 +97,7 @@ django = "*"
django-filter = "*" django-filter = "*"
django-guardian = "*" django-guardian = "*"
django-model-utils = "*" django-model-utils = "*"
django-pglock = "*"
django-prometheus = "*" django-prometheus = "*"
django-redis = "*" django-redis = "*"
django-storages = { extras = ["s3"], version = "*" } django-storages = { extras = ["s3"], version = "*" }