From 0a9dcbcd7dfe44f228c2c73ae4e83e54599c88c7 Mon Sep 17 00:00:00 2001 From: "Nathan J. Mehl" Date: Thu, 30 Jan 2025 14:03:48 -0500 Subject: [PATCH] Expose issuer cert TTL via log and prometheus Add a prometheus gauge function in the identity package that exposes the current TTL in seconds of the issuer certificate. When a new issuer certificate is loaded, log its NotAfter time in unix epoch format, along with the current process wall clock time. This addresses https://github.com/linkerd/linkerd2/issues/11215 Signed-off-by: Nathan J. Mehl --- pkg/identity/service.go | 43 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/pkg/identity/service.go b/pkg/identity/service.go index b56492ecf5eca..3abff9ab0d550 100644 --- a/pkg/identity/service.go +++ b/pkg/identity/service.go @@ -17,6 +17,7 @@ import ( pb "github.com/linkerd/linkerd2-proxy-api/go/identity" "github.com/linkerd/linkerd2/pkg/tls" + "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -50,6 +51,8 @@ type ( recordEvent func(parent runtime.Object, eventType, reason, message string) expectedName, issuerPathCrt, issuerPathKey string + issuerCertExpiration time.Time + issuerCertExpireGauge prometheus.GaugeFunc } // Validator implementors accept a bearer token, validates it, and returns a @@ -92,6 +95,14 @@ func (svc *Service) updateIssuer(newIssuer tls.Issuer) { svc.issuerMutex.Unlock() } +func (svc *Service) getIssuerCertValidityRemaining() float64 { + if svc.issuer == nil { + log.Warn("Certificate issuer is not ready; cannot get expiration") + return float64(0) + } + return time.Since(svc.issuerCertExpiration).Seconds() +} + // Run reads from the issuer and error channels and reloads the issuer certs when necessary func (svc *Service) Run(issuerEvent <-chan struct{}, issuerError <-chan error) { for { @@ -131,13 +142,20 @@ func (svc *Service) loadCredentials() (tls.Issuer, error) { return nil, fmt.Errorf("failed to verify issuer certificate: it must be an intermediate-CA, but it is not") } + svc.issuerCertExpiration = creds.Certificate.NotAfter + log.Debugf("Loaded issuer cert: %s", creds.EncodeCertificatePEM()) + log.WithFields(log.Fields{ + "invalid_after": creds.Certificate.NotAfter.Unix(), + "process_clock_time": time.Now().Unix(), + }).Info("Issuer cert loaded") return tls.NewCA(*creds, *svc.validity), nil } // NewService creates a new identity service. func NewService(validator Validator, trustAnchors *x509.CertPool, validity *tls.Validity, recordEvent func(parent runtime.Object, eventType, reason, message string), expectedName, issuerPathCrt, issuerPathKey string) *Service { - return &Service{ + + svc := &Service{ pb.UnimplementedIdentityServer{}, validator, trustAnchors, @@ -148,7 +166,26 @@ func NewService(validator Validator, trustAnchors *x509.CertPool, validity *tls. expectedName, issuerPathCrt, issuerPathKey, + time.Time{}, + nil, + } + + issuerCertExpireGauge := prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "issuer_cert_remaining_seconds", + Help: "The remaining seconds until the issuer certificate expires", + }, svc.getIssuerCertValidityRemaining) + + if err := prometheus.Register(issuerCertExpireGauge); err != nil { + var are prometheus.AlreadyRegisteredError + if errors.As(err, &are) { + svc.issuerCertExpireGauge = are.ExistingCollector.(prometheus.GaugeFunc) + } else { + panic(err) + } + } else { + svc.issuerCertExpireGauge = issuerCertExpireGauge } + return svc } // Register registers an identity service implementation in the provided gRPC @@ -170,6 +207,10 @@ func (svc *Service) ensureIssuerStillValid() error { } } +func (svc *Service) UnregisterGauges() { + prometheus.Unregister(svc.issuerCertExpireGauge) +} + // Certify validates identity and signs certificates. func (svc *Service) Certify(ctx context.Context, req *pb.CertifyRequest) (*pb.CertifyResponse, error) { svc.issuerMutex.RLock()