outposts: make metrics compliant with Prometheus best-practices (#6398)

web/outpost: make metrics compliant with Prometheus best-practices

Today, all NewHistogramVec store values in nanoseconds without changing
the default histogram bucket, which are made for seconds, making them
a bit useless. In addition, some metrics names are not self-explanatoryand
and do not comply with Prometheus best practices.

This commit tries to fix all of this "issues".

NOTE: I kept old metrics in order to avoid breaking changes with
existing dashboards and metrics.

Signed-off-by: Alexandre NICOLAIE <xunleii@users.noreply.github.com>
This commit is contained in:
Alexandre NICOLAIE
2023-07-27 18:51:08 +02:00
committed by GitHub
parent 5347dd7022
commit a2714ab1f1
17 changed files with 235 additions and 16 deletions

View File

@ -32,6 +32,11 @@ func (rs *RadiusServer) Handle_AccessRequest(w radius.ResponseWriter, r *RadiusR
"reason": "flow_error",
"app": r.pi.appSlug,
}).Inc()
metrics.RequestsRejectedLegacy.With(prometheus.Labels{
"outpost_name": rs.ac.Outpost.Name,
"reason": "flow_error",
"app": r.pi.appSlug,
}).Inc()
_ = w.Write(r.Response(radius.CodeAccessReject))
return
}
@ -41,6 +46,11 @@ func (rs *RadiusServer) Handle_AccessRequest(w radius.ResponseWriter, r *RadiusR
"reason": "invalid_credentials",
"app": r.pi.appSlug,
}).Inc()
metrics.RequestsRejectedLegacy.With(prometheus.Labels{
"outpost_name": rs.ac.Outpost.Name,
"reason": "invalid_credentials",
"app": r.pi.appSlug,
}).Inc()
_ = w.Write(r.Response(radius.CodeAccessReject))
return
}
@ -53,6 +63,11 @@ func (rs *RadiusServer) Handle_AccessRequest(w radius.ResponseWriter, r *RadiusR
"reason": "access_check_fail",
"app": r.pi.appSlug,
}).Inc()
metrics.RequestsRejectedLegacy.With(prometheus.Labels{
"outpost_name": rs.ac.Outpost.Name,
"reason": "access_check_fail",
"app": r.pi.appSlug,
}).Inc()
return
}
if !access {
@ -63,6 +78,11 @@ func (rs *RadiusServer) Handle_AccessRequest(w radius.ResponseWriter, r *RadiusR
"reason": "access_denied",
"app": r.pi.appSlug,
}).Inc()
metrics.RequestsRejectedLegacy.With(prometheus.Labels{
"outpost_name": rs.ac.Outpost.Name,
"reason": "access_denied",
"app": r.pi.appSlug,
}).Inc()
return
}
_ = w.Write(r.Response(radius.CodeAccessAccept))

View File

@ -2,6 +2,7 @@ package radius
import (
"crypto/sha512"
"time"
"github.com/getsentry/sentry-go"
"github.com/google/uuid"
@ -45,6 +46,10 @@ func (rs *RadiusServer) ServeRADIUS(w radius.ResponseWriter, r *radius.Request)
metrics.Requests.With(prometheus.Labels{
"outpost_name": rs.ac.Outpost.Name,
"app": selectedApp,
}).Observe(float64(span.EndTime.Sub(span.StartTime)) / float64(time.Second))
metrics.RequestsLegacy.With(prometheus.Labels{
"outpost_name": rs.ac.Outpost.Name,
"app": selectedApp,
}).Observe(float64(span.EndTime.Sub(span.StartTime)))
}()

View File

@ -15,10 +15,20 @@ import (
var (
Requests = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "authentik_outpost_radius_request_duration_seconds",
Help: "RADIUS request latencies in seconds",
}, []string{"outpost_name", "app"})
RequestsRejected = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "authentik_outpost_radius_requests_rejected_total",
Help: "Total number of rejected requests",
}, []string{"outpost_name", "reason", "app"})
// NOTE: the following metric is kept for compatibility purpose
RequestsLegacy = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "authentik_outpost_radius_requests",
Help: "The total number of successful requests",
}, []string{"outpost_name", "app"})
RequestsRejected = promauto.NewCounterVec(prometheus.CounterOpts{
RequestsRejectedLegacy = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "authentik_outpost_radius_requests_rejected",
Help: "Total number of rejected requests",
}, []string{"outpost_name", "reason", "app"})