providers/scim, sources/ldap: switch to using postgres advisory locks instead of redis locks (#9511)

* providers/scim, sources/ldap: switch to using postgres advisory locks instead of redis locks

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>

* website/integrations: discord: fix typo

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>

* fix timeout logic

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>

* remove redis locks completely

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>

* Apply suggestions from code review

Signed-off-by: Jens L. <jens@beryju.org>

---------

Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>
Signed-off-by: Jens L. <jens@beryju.org>
Co-authored-by: Jens L <jens@goauthentik.io>
This commit is contained in:
Marc 'risson' Schmitt
2024-05-23 13:41:42 +02:00
committed by GitHub
parent fbab822db1
commit fbad02fac1
8 changed files with 85 additions and 60 deletions

View File

@ -3,3 +3,4 @@
PAGE_SIZE = 100
PAGE_TIMEOUT = 60 * 60 * 0.5 # Half an hour
HTTP_CONFLICT = 409
LOCK_ACQUIRE_TIMEOUT = 5

View File

@ -1,11 +1,11 @@
from typing import Any, Self
from django.core.cache import cache
import pglock
from django.db import connection
from django.db.models import Model, QuerySet, TextChoices
from redis.lock import Lock
from authentik.core.models import Group, User
from authentik.lib.sync.outgoing import PAGE_TIMEOUT
from authentik.lib.sync.outgoing import LOCK_ACQUIRE_TIMEOUT
from authentik.lib.sync.outgoing.base import BaseOutgoingSyncClient
@ -32,10 +32,10 @@ class OutgoingSyncProvider(Model):
raise NotImplementedError
@property
def sync_lock(self) -> Lock:
"""Redis lock to prevent multiple parallel syncs happening"""
return Lock(
cache.client.get_client(),
name=f"goauthentik.io/providers/outgoing-sync/{str(self.pk)}",
timeout=(60 * 60 * PAGE_TIMEOUT) * 3,
def sync_lock(self) -> pglock.advisory:
"""Postgres lock for syncing SCIM to prevent multiple parallel syncs happening"""
return pglock.advisory(
lock_id=f"goauthentik.io/{connection.schema_name}/providers/outgoing-sync/{str(self.pk)}",
timeout=LOCK_ACQUIRE_TIMEOUT,
side_effect=pglock.Raise,
)

View File

@ -4,6 +4,7 @@ from celery.exceptions import Retry
from celery.result import allow_join_result
from django.core.paginator import Paginator
from django.db.models import Model, QuerySet
from django.db.utils import OperationalError
from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _
from structlog.stdlib import BoundLogger, get_logger
@ -64,40 +65,40 @@ class SyncTasks:
).first()
if not provider:
return
lock = provider.sync_lock
if lock.locked():
self.logger.debug("Sync locked, skipping task", source=provider.name)
return
task.set_uid(slugify(provider.name))
messages = []
messages.append(_("Starting full provider sync"))
self.logger.debug("Starting provider sync")
users_paginator = Paginator(provider.get_object_qs(User), PAGE_SIZE)
groups_paginator = Paginator(provider.get_object_qs(Group), PAGE_SIZE)
with allow_join_result(), lock:
try:
for page in users_paginator.page_range:
messages.append(_("Syncing page %(page)d of users" % {"page": page}))
for msg in sync_objects.apply_async(
args=(class_to_path(User), page, provider_pk),
time_limit=PAGE_TIMEOUT,
soft_time_limit=PAGE_TIMEOUT,
).get():
messages.append(msg)
for page in groups_paginator.page_range:
messages.append(_("Syncing page %(page)d of groups" % {"page": page}))
for msg in sync_objects.apply_async(
args=(class_to_path(Group), page, provider_pk),
time_limit=PAGE_TIMEOUT,
soft_time_limit=PAGE_TIMEOUT,
).get():
messages.append(msg)
except TransientSyncException as exc:
self.logger.warning("transient sync exception", exc=exc)
raise task.retry(exc=exc) from exc
except StopSync as exc:
task.set_error(exc)
return
try:
with allow_join_result(), provider.sync_lock:
try:
for page in users_paginator.page_range:
messages.append(_("Syncing page %(page)d of users" % {"page": page}))
for msg in sync_objects.apply_async(
args=(class_to_path(User), page, provider_pk),
time_limit=PAGE_TIMEOUT,
soft_time_limit=PAGE_TIMEOUT,
).get():
messages.append(msg)
for page in groups_paginator.page_range:
messages.append(_("Syncing page %(page)d of groups" % {"page": page}))
for msg in sync_objects.apply_async(
args=(class_to_path(Group), page, provider_pk),
time_limit=PAGE_TIMEOUT,
soft_time_limit=PAGE_TIMEOUT,
).get():
messages.append(msg)
except TransientSyncException as exc:
self.logger.warning("transient sync exception", exc=exc)
raise task.retry(exc=exc) from exc
except StopSync as exc:
task.set_error(exc)
return
except OperationalError:
self.logger.debug("Failed to acquire sync lock, skipping", provider=provider.name)
return
task.set_status(TaskStatus.SUCCESSFUL, *messages)
def sync_objects(self, object_type: str, page: int, provider_pk: int):

View File

@ -60,6 +60,8 @@ SHARED_APPS = [
"django_filters",
"drf_spectacular",
"django_prometheus",
"pgactivity",
"pglock",
"channels",
]
TENANT_APPS = [

View File

@ -6,19 +6,19 @@ from shutil import rmtree
from ssl import CERT_REQUIRED
from tempfile import NamedTemporaryFile, mkdtemp
from django.core.cache import cache
import pglock
from django.db import connection, models
from django.templatetags.static import static
from django.utils.translation import gettext_lazy as _
from ldap3 import ALL, NONE, RANDOM, Connection, Server, ServerPool, Tls
from ldap3.core.exceptions import LDAPException, LDAPInsufficientAccessRightsResult, LDAPSchemaError
from redis.lock import Lock
from rest_framework.serializers import Serializer
from authentik.core.models import Group, PropertyMapping, Source
from authentik.crypto.models import CertificateKeyPair
from authentik.lib.config import CONFIG
from authentik.lib.models import DomainlessURLValidator
from authentik.lib.sync.outgoing import LOCK_ACQUIRE_TIMEOUT
LDAP_TIMEOUT = 15
@ -209,15 +209,12 @@ class LDAPSource(Source):
return RuntimeError("Failed to bind")
@property
def sync_lock(self) -> Lock:
"""Redis lock for syncing LDAP to prevent multiple parallel syncs happening"""
return Lock(
cache.client.get_client(),
name=f"goauthentik.io/sources/ldap/sync/{connection.schema_name}-{self.slug}",
# Convert task timeout hours to seconds, and multiply times 3
# (see authentik/sources/ldap/tasks.py:54)
# multiply by 3 to add even more leeway
timeout=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 3,
def sync_lock(self) -> pglock.advisory:
"""Postgres lock for syncing LDAP to prevent multiple parallel syncs happening"""
return pglock.advisory(
lock_id=f"goauthentik.io/{connection.schema_name}/sources/ldap/sync/{self.slug}",
timeout=LOCK_ACQUIRE_TIMEOUT,
side_effect=pglock.Raise,
)
def check_connection(self) -> dict[str, dict[str, str]]:

View File

@ -4,8 +4,8 @@ from uuid import uuid4
from celery import chain, group
from django.core.cache import cache
from django.db.utils import OperationalError
from ldap3.core.exceptions import LDAPException
from redis.exceptions import LockError
from structlog.stdlib import get_logger
from authentik.events.models import SystemTask as DBSystemTask
@ -64,12 +64,8 @@ def ldap_sync_single(source_pk: str):
source: LDAPSource = LDAPSource.objects.filter(pk=source_pk).first()
if not source:
return
lock = source.sync_lock
if lock.locked():
LOGGER.debug("LDAP sync locked, skipping task", source=source.slug)
return
try:
with lock:
with source.sync_lock:
# Delete all sync tasks from the cache
DBSystemTask.objects.filter(name="ldap_sync", uid__startswith=source.slug).delete()
task = chain(
@ -84,10 +80,8 @@ def ldap_sync_single(source_pk: str):
),
)
task()
except LockError:
# This should never happen, we check if the lock is locked above so this
# would only happen if there was some other timeout
LOGGER.debug("Failed to acquire lock for LDAP sync", source=source.slug)
except OperationalError:
LOGGER.debug("Failed to acquire lock for LDAP sync, skipping task", source=source.slug)
def ldap_sync_paginator(source: LDAPSource, sync: type[BaseLDAPSynchronizer]) -> list:

33
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "aiohttp"
@ -1236,6 +1236,35 @@ files = [
[package.dependencies]
Django = ">=3.2"
[[package]]
name = "django-pgactivity"
version = "1.4.1"
description = "Monitor, kill, and analyze Postgres queries."
optional = false
python-versions = "<4,>=3.8.0"
files = [
{file = "django_pgactivity-1.4.1-py3-none-any.whl", hash = "sha256:e7affa4dc08e7650092a582375729081362a3103f1148e34e8406ddf114eeb95"},
{file = "django_pgactivity-1.4.1.tar.gz", hash = "sha256:00da0f0156daa37f5f113c7a6d9378a6f6d111e44f20d3b30b367d5428e18b07"},
]
[package.dependencies]
django = ">=3"
[[package]]
name = "django-pglock"
version = "1.5.1"
description = "Postgres locking routines and lock table access."
optional = false
python-versions = "<4,>=3.8.0"
files = [
{file = "django_pglock-1.5.1-py3-none-any.whl", hash = "sha256:d3b977922abbaffd43968714b69cdab7453866adf2b0695fb497491748d7bc67"},
{file = "django_pglock-1.5.1.tar.gz", hash = "sha256:291903d5d877b68558003e1d64d764ebd5590344ba3b7aa1d5127df5947869b1"},
]
[package.dependencies]
django = ">=3"
django-pgactivity = ">=1.2,<2"
[[package]]
name = "django-prometheus"
version = "2.3.1"
@ -5303,4 +5332,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = "~3.12"
content-hash = "6399c90a2adca3e7119277bf4d0649fe0826d5fb4454a23b1b1fad3e64a1fe90"
content-hash = "112c777b6cf6bec7583f3994cd1fa0165d046d09a2f378990ba6bb626f9739ca"

View File

@ -97,6 +97,7 @@ django = "*"
django-filter = "*"
django-guardian = "*"
django-model-utils = "*"
django-pglock = "*"
django-prometheus = "*"
django-redis = "*"
django-storages = { extras = ["s3"], version = "*" }