providers/scim, sources/ldap: switch to using postgres advisory locks instead of redis locks (#9511)
* providers/scim, sources/ldap: switch to using postgres advisory locks instead of redis locks Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space> * website/integrations: discord: fix typo Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space> * fix timeout logic Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space> * remove redis locks completely Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space> * Apply suggestions from code review Signed-off-by: Jens L. <jens@beryju.org> --------- Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space> Signed-off-by: Jens L. <jens@beryju.org> Co-authored-by: Jens L <jens@goauthentik.io>
This commit is contained in:

committed by
GitHub

parent
fbab822db1
commit
fbad02fac1
@ -3,3 +3,4 @@
|
|||||||
PAGE_SIZE = 100
|
PAGE_SIZE = 100
|
||||||
PAGE_TIMEOUT = 60 * 60 * 0.5 # Half an hour
|
PAGE_TIMEOUT = 60 * 60 * 0.5 # Half an hour
|
||||||
HTTP_CONFLICT = 409
|
HTTP_CONFLICT = 409
|
||||||
|
LOCK_ACQUIRE_TIMEOUT = 5
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
from typing import Any, Self
|
from typing import Any, Self
|
||||||
|
|
||||||
from django.core.cache import cache
|
import pglock
|
||||||
|
from django.db import connection
|
||||||
from django.db.models import Model, QuerySet, TextChoices
|
from django.db.models import Model, QuerySet, TextChoices
|
||||||
from redis.lock import Lock
|
|
||||||
|
|
||||||
from authentik.core.models import Group, User
|
from authentik.core.models import Group, User
|
||||||
from authentik.lib.sync.outgoing import PAGE_TIMEOUT
|
from authentik.lib.sync.outgoing import LOCK_ACQUIRE_TIMEOUT
|
||||||
from authentik.lib.sync.outgoing.base import BaseOutgoingSyncClient
|
from authentik.lib.sync.outgoing.base import BaseOutgoingSyncClient
|
||||||
|
|
||||||
|
|
||||||
@ -32,10 +32,10 @@ class OutgoingSyncProvider(Model):
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def sync_lock(self) -> Lock:
|
def sync_lock(self) -> pglock.advisory:
|
||||||
"""Redis lock to prevent multiple parallel syncs happening"""
|
"""Postgres lock for syncing SCIM to prevent multiple parallel syncs happening"""
|
||||||
return Lock(
|
return pglock.advisory(
|
||||||
cache.client.get_client(),
|
lock_id=f"goauthentik.io/{connection.schema_name}/providers/outgoing-sync/{str(self.pk)}",
|
||||||
name=f"goauthentik.io/providers/outgoing-sync/{str(self.pk)}",
|
timeout=LOCK_ACQUIRE_TIMEOUT,
|
||||||
timeout=(60 * 60 * PAGE_TIMEOUT) * 3,
|
side_effect=pglock.Raise,
|
||||||
)
|
)
|
||||||
|
@ -4,6 +4,7 @@ from celery.exceptions import Retry
|
|||||||
from celery.result import allow_join_result
|
from celery.result import allow_join_result
|
||||||
from django.core.paginator import Paginator
|
from django.core.paginator import Paginator
|
||||||
from django.db.models import Model, QuerySet
|
from django.db.models import Model, QuerySet
|
||||||
|
from django.db.utils import OperationalError
|
||||||
from django.utils.text import slugify
|
from django.utils.text import slugify
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
from structlog.stdlib import BoundLogger, get_logger
|
from structlog.stdlib import BoundLogger, get_logger
|
||||||
@ -64,40 +65,40 @@ class SyncTasks:
|
|||||||
).first()
|
).first()
|
||||||
if not provider:
|
if not provider:
|
||||||
return
|
return
|
||||||
lock = provider.sync_lock
|
|
||||||
if lock.locked():
|
|
||||||
self.logger.debug("Sync locked, skipping task", source=provider.name)
|
|
||||||
return
|
|
||||||
task.set_uid(slugify(provider.name))
|
task.set_uid(slugify(provider.name))
|
||||||
messages = []
|
messages = []
|
||||||
messages.append(_("Starting full provider sync"))
|
messages.append(_("Starting full provider sync"))
|
||||||
self.logger.debug("Starting provider sync")
|
self.logger.debug("Starting provider sync")
|
||||||
users_paginator = Paginator(provider.get_object_qs(User), PAGE_SIZE)
|
users_paginator = Paginator(provider.get_object_qs(User), PAGE_SIZE)
|
||||||
groups_paginator = Paginator(provider.get_object_qs(Group), PAGE_SIZE)
|
groups_paginator = Paginator(provider.get_object_qs(Group), PAGE_SIZE)
|
||||||
with allow_join_result(), lock:
|
try:
|
||||||
try:
|
with allow_join_result(), provider.sync_lock:
|
||||||
for page in users_paginator.page_range:
|
try:
|
||||||
messages.append(_("Syncing page %(page)d of users" % {"page": page}))
|
for page in users_paginator.page_range:
|
||||||
for msg in sync_objects.apply_async(
|
messages.append(_("Syncing page %(page)d of users" % {"page": page}))
|
||||||
args=(class_to_path(User), page, provider_pk),
|
for msg in sync_objects.apply_async(
|
||||||
time_limit=PAGE_TIMEOUT,
|
args=(class_to_path(User), page, provider_pk),
|
||||||
soft_time_limit=PAGE_TIMEOUT,
|
time_limit=PAGE_TIMEOUT,
|
||||||
).get():
|
soft_time_limit=PAGE_TIMEOUT,
|
||||||
messages.append(msg)
|
).get():
|
||||||
for page in groups_paginator.page_range:
|
messages.append(msg)
|
||||||
messages.append(_("Syncing page %(page)d of groups" % {"page": page}))
|
for page in groups_paginator.page_range:
|
||||||
for msg in sync_objects.apply_async(
|
messages.append(_("Syncing page %(page)d of groups" % {"page": page}))
|
||||||
args=(class_to_path(Group), page, provider_pk),
|
for msg in sync_objects.apply_async(
|
||||||
time_limit=PAGE_TIMEOUT,
|
args=(class_to_path(Group), page, provider_pk),
|
||||||
soft_time_limit=PAGE_TIMEOUT,
|
time_limit=PAGE_TIMEOUT,
|
||||||
).get():
|
soft_time_limit=PAGE_TIMEOUT,
|
||||||
messages.append(msg)
|
).get():
|
||||||
except TransientSyncException as exc:
|
messages.append(msg)
|
||||||
self.logger.warning("transient sync exception", exc=exc)
|
except TransientSyncException as exc:
|
||||||
raise task.retry(exc=exc) from exc
|
self.logger.warning("transient sync exception", exc=exc)
|
||||||
except StopSync as exc:
|
raise task.retry(exc=exc) from exc
|
||||||
task.set_error(exc)
|
except StopSync as exc:
|
||||||
return
|
task.set_error(exc)
|
||||||
|
return
|
||||||
|
except OperationalError:
|
||||||
|
self.logger.debug("Failed to acquire sync lock, skipping", provider=provider.name)
|
||||||
|
return
|
||||||
task.set_status(TaskStatus.SUCCESSFUL, *messages)
|
task.set_status(TaskStatus.SUCCESSFUL, *messages)
|
||||||
|
|
||||||
def sync_objects(self, object_type: str, page: int, provider_pk: int):
|
def sync_objects(self, object_type: str, page: int, provider_pk: int):
|
||||||
|
@ -60,6 +60,8 @@ SHARED_APPS = [
|
|||||||
"django_filters",
|
"django_filters",
|
||||||
"drf_spectacular",
|
"drf_spectacular",
|
||||||
"django_prometheus",
|
"django_prometheus",
|
||||||
|
"pgactivity",
|
||||||
|
"pglock",
|
||||||
"channels",
|
"channels",
|
||||||
]
|
]
|
||||||
TENANT_APPS = [
|
TENANT_APPS = [
|
||||||
|
@ -6,19 +6,19 @@ from shutil import rmtree
|
|||||||
from ssl import CERT_REQUIRED
|
from ssl import CERT_REQUIRED
|
||||||
from tempfile import NamedTemporaryFile, mkdtemp
|
from tempfile import NamedTemporaryFile, mkdtemp
|
||||||
|
|
||||||
from django.core.cache import cache
|
import pglock
|
||||||
from django.db import connection, models
|
from django.db import connection, models
|
||||||
from django.templatetags.static import static
|
from django.templatetags.static import static
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
from ldap3 import ALL, NONE, RANDOM, Connection, Server, ServerPool, Tls
|
from ldap3 import ALL, NONE, RANDOM, Connection, Server, ServerPool, Tls
|
||||||
from ldap3.core.exceptions import LDAPException, LDAPInsufficientAccessRightsResult, LDAPSchemaError
|
from ldap3.core.exceptions import LDAPException, LDAPInsufficientAccessRightsResult, LDAPSchemaError
|
||||||
from redis.lock import Lock
|
|
||||||
from rest_framework.serializers import Serializer
|
from rest_framework.serializers import Serializer
|
||||||
|
|
||||||
from authentik.core.models import Group, PropertyMapping, Source
|
from authentik.core.models import Group, PropertyMapping, Source
|
||||||
from authentik.crypto.models import CertificateKeyPair
|
from authentik.crypto.models import CertificateKeyPair
|
||||||
from authentik.lib.config import CONFIG
|
from authentik.lib.config import CONFIG
|
||||||
from authentik.lib.models import DomainlessURLValidator
|
from authentik.lib.models import DomainlessURLValidator
|
||||||
|
from authentik.lib.sync.outgoing import LOCK_ACQUIRE_TIMEOUT
|
||||||
|
|
||||||
LDAP_TIMEOUT = 15
|
LDAP_TIMEOUT = 15
|
||||||
|
|
||||||
@ -209,15 +209,12 @@ class LDAPSource(Source):
|
|||||||
return RuntimeError("Failed to bind")
|
return RuntimeError("Failed to bind")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def sync_lock(self) -> Lock:
|
def sync_lock(self) -> pglock.advisory:
|
||||||
"""Redis lock for syncing LDAP to prevent multiple parallel syncs happening"""
|
"""Postgres lock for syncing LDAP to prevent multiple parallel syncs happening"""
|
||||||
return Lock(
|
return pglock.advisory(
|
||||||
cache.client.get_client(),
|
lock_id=f"goauthentik.io/{connection.schema_name}/sources/ldap/sync/{self.slug}",
|
||||||
name=f"goauthentik.io/sources/ldap/sync/{connection.schema_name}-{self.slug}",
|
timeout=LOCK_ACQUIRE_TIMEOUT,
|
||||||
# Convert task timeout hours to seconds, and multiply times 3
|
side_effect=pglock.Raise,
|
||||||
# (see authentik/sources/ldap/tasks.py:54)
|
|
||||||
# multiply by 3 to add even more leeway
|
|
||||||
timeout=(60 * 60 * CONFIG.get_int("ldap.task_timeout_hours")) * 3,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def check_connection(self) -> dict[str, dict[str, str]]:
|
def check_connection(self) -> dict[str, dict[str, str]]:
|
||||||
|
@ -4,8 +4,8 @@ from uuid import uuid4
|
|||||||
|
|
||||||
from celery import chain, group
|
from celery import chain, group
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
from django.db.utils import OperationalError
|
||||||
from ldap3.core.exceptions import LDAPException
|
from ldap3.core.exceptions import LDAPException
|
||||||
from redis.exceptions import LockError
|
|
||||||
from structlog.stdlib import get_logger
|
from structlog.stdlib import get_logger
|
||||||
|
|
||||||
from authentik.events.models import SystemTask as DBSystemTask
|
from authentik.events.models import SystemTask as DBSystemTask
|
||||||
@ -64,12 +64,8 @@ def ldap_sync_single(source_pk: str):
|
|||||||
source: LDAPSource = LDAPSource.objects.filter(pk=source_pk).first()
|
source: LDAPSource = LDAPSource.objects.filter(pk=source_pk).first()
|
||||||
if not source:
|
if not source:
|
||||||
return
|
return
|
||||||
lock = source.sync_lock
|
|
||||||
if lock.locked():
|
|
||||||
LOGGER.debug("LDAP sync locked, skipping task", source=source.slug)
|
|
||||||
return
|
|
||||||
try:
|
try:
|
||||||
with lock:
|
with source.sync_lock:
|
||||||
# Delete all sync tasks from the cache
|
# Delete all sync tasks from the cache
|
||||||
DBSystemTask.objects.filter(name="ldap_sync", uid__startswith=source.slug).delete()
|
DBSystemTask.objects.filter(name="ldap_sync", uid__startswith=source.slug).delete()
|
||||||
task = chain(
|
task = chain(
|
||||||
@ -84,10 +80,8 @@ def ldap_sync_single(source_pk: str):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
task()
|
task()
|
||||||
except LockError:
|
except OperationalError:
|
||||||
# This should never happen, we check if the lock is locked above so this
|
LOGGER.debug("Failed to acquire lock for LDAP sync, skipping task", source=source.slug)
|
||||||
# would only happen if there was some other timeout
|
|
||||||
LOGGER.debug("Failed to acquire lock for LDAP sync", source=source.slug)
|
|
||||||
|
|
||||||
|
|
||||||
def ldap_sync_paginator(source: LDAPSource, sync: type[BaseLDAPSynchronizer]) -> list:
|
def ldap_sync_paginator(source: LDAPSource, sync: type[BaseLDAPSynchronizer]) -> list:
|
||||||
|
33
poetry.lock
generated
33
poetry.lock
generated
@ -1,4 +1,4 @@
|
|||||||
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiohttp"
|
name = "aiohttp"
|
||||||
@ -1236,6 +1236,35 @@ files = [
|
|||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
Django = ">=3.2"
|
Django = ">=3.2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "django-pgactivity"
|
||||||
|
version = "1.4.1"
|
||||||
|
description = "Monitor, kill, and analyze Postgres queries."
|
||||||
|
optional = false
|
||||||
|
python-versions = "<4,>=3.8.0"
|
||||||
|
files = [
|
||||||
|
{file = "django_pgactivity-1.4.1-py3-none-any.whl", hash = "sha256:e7affa4dc08e7650092a582375729081362a3103f1148e34e8406ddf114eeb95"},
|
||||||
|
{file = "django_pgactivity-1.4.1.tar.gz", hash = "sha256:00da0f0156daa37f5f113c7a6d9378a6f6d111e44f20d3b30b367d5428e18b07"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
django = ">=3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "django-pglock"
|
||||||
|
version = "1.5.1"
|
||||||
|
description = "Postgres locking routines and lock table access."
|
||||||
|
optional = false
|
||||||
|
python-versions = "<4,>=3.8.0"
|
||||||
|
files = [
|
||||||
|
{file = "django_pglock-1.5.1-py3-none-any.whl", hash = "sha256:d3b977922abbaffd43968714b69cdab7453866adf2b0695fb497491748d7bc67"},
|
||||||
|
{file = "django_pglock-1.5.1.tar.gz", hash = "sha256:291903d5d877b68558003e1d64d764ebd5590344ba3b7aa1d5127df5947869b1"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
django = ">=3"
|
||||||
|
django-pgactivity = ">=1.2,<2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "django-prometheus"
|
name = "django-prometheus"
|
||||||
version = "2.3.1"
|
version = "2.3.1"
|
||||||
@ -5303,4 +5332,4 @@ files = [
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "~3.12"
|
python-versions = "~3.12"
|
||||||
content-hash = "6399c90a2adca3e7119277bf4d0649fe0826d5fb4454a23b1b1fad3e64a1fe90"
|
content-hash = "112c777b6cf6bec7583f3994cd1fa0165d046d09a2f378990ba6bb626f9739ca"
|
||||||
|
@ -97,6 +97,7 @@ django = "*"
|
|||||||
django-filter = "*"
|
django-filter = "*"
|
||||||
django-guardian = "*"
|
django-guardian = "*"
|
||||||
django-model-utils = "*"
|
django-model-utils = "*"
|
||||||
|
django-pglock = "*"
|
||||||
django-prometheus = "*"
|
django-prometheus = "*"
|
||||||
django-redis = "*"
|
django-redis = "*"
|
||||||
django-storages = { extras = ["s3"], version = "*" }
|
django-storages = { extras = ["s3"], version = "*" }
|
||||||
|
Reference in New Issue
Block a user