Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for custom labels in Prometheus metrics #979

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
19 changes: 19 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,25 @@ type Config struct {
lastFileModTime time.Time // last modification time
}

// GetMetricLabels returns a slice of unique metric labels from all enabled endpoints
// in the configuration. It iterates through each endpoint, checks if it is enabled,
// and then collects unique labels from the endpoint's labels map.
func (config *Config) GetMetricLabels() []string {
labels := make([]string, 0)
for _, ep := range config.Endpoints {
if !ep.IsEnabled() {
continue
}
for label := range ep.Labels {
if contains(labels, label) {
continue
}
labels = append(labels, label)
}
}
return labels
}

func (config *Config) GetEndpointByKey(key string) *endpoint.Endpoint {
for i := 0; i < len(config.Endpoints); i++ {
ep := config.Endpoints[i]
Expand Down
119 changes: 115 additions & 4 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ endpoints:
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
for path, content := range scenario.pathAndFiles {
if err := os.WriteFile(filepath.Join(dir, path), []byte(content), 0644); err != nil {
if err := os.WriteFile(filepath.Join(dir, path), []byte(content), 0o644); err != nil {
t.Fatalf("[%s] failed to write file: %v", scenario.name, err)
}
}
Expand Down Expand Up @@ -282,7 +282,7 @@ func TestConfig_HasLoadedConfigurationBeenModified(t *testing.T) {
url: https://twin.sh/health
conditions:
- "[STATUS] == 200"
`), 0644)
`), 0o644)

t.Run("config-file-as-config-path", func(t *testing.T) {
config, err := LoadConfiguration(configFilePath)
Expand All @@ -298,7 +298,7 @@ func TestConfig_HasLoadedConfigurationBeenModified(t *testing.T) {
- name: website
url: https://twin.sh/health
conditions:
- "[STATUS] == 200"`), 0644); err != nil {
- "[STATUS] == 200"`), 0o644); err != nil {
t.Fatalf("failed to overwrite config file: %v", err)
}
if !config.HasLoadedConfigurationBeenModified() {
Expand All @@ -315,7 +315,7 @@ func TestConfig_HasLoadedConfigurationBeenModified(t *testing.T) {
}
time.Sleep(time.Second) // Because the file mod time only has second precision, we have to wait for a second
// Update the config file
if err = os.WriteFile(filepath.Join(dir, "metrics.yaml"), []byte(`metrics: true`), 0644); err != nil {
if err = os.WriteFile(filepath.Join(dir, "metrics.yaml"), []byte(`metrics: true`), 0o644); err != nil {
t.Fatalf("failed to overwrite config file: %v", err)
}
if !config.HasLoadedConfigurationBeenModified() {
Expand Down Expand Up @@ -1938,3 +1938,114 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
})
}
}

func TestConfig_GetMetricLabels(t *testing.T) {
tests := []struct {
name string
config *Config
expected []string
}{
{
name: "no-endpoints",
config: &Config{
Endpoints: []*endpoint.Endpoint{},
},
expected: []string{},
},
{
name: "single-endpoint-no-labels",
config: &Config{
Endpoints: []*endpoint.Endpoint{
{
Name: "endpoint1",
URL: "https://example.com",
},
},
},
expected: []string{},
},
{
name: "single-endpoint-with-labels",
config: &Config{
Endpoints: []*endpoint.Endpoint{
{
Name: "endpoint1",
URL: "https://example.com",
Enabled: toPtr(true),
Labels: map[string]string{
"env": "production",
"team": "backend",
},
},
},
},
expected: []string{"env", "team"},
},
{
name: "multiple-endpoints-with-labels",
config: &Config{
Endpoints: []*endpoint.Endpoint{
{
Name: "endpoint1",
URL: "https://example.com",
Enabled: toPtr(true),
Labels: map[string]string{
"env": "production",
"team": "backend",
"module": "auth",
},
},
{
Name: "endpoint2",
URL: "https://example.org",
Enabled: toPtr(true),
Labels: map[string]string{
"env": "staging",
"team": "frontend",
},
},
},
},
expected: []string{"env", "team", "module"},
},
{
name: "multiple-endpoints-with-some-disabled",
config: &Config{
Endpoints: []*endpoint.Endpoint{
{
Name: "endpoint1",
URL: "https://example.com",
Enabled: toPtr(true),
Labels: map[string]string{
"env": "production",
"team": "backend",
},
},
{
Name: "endpoint2",
URL: "https://example.org",
Enabled: toPtr(false),
Labels: map[string]string{
"module": "auth",
},
},
},
},
expected: []string{"env", "team"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
labels := tt.config.GetMetricLabels()
if len(labels) != len(tt.expected) {
t.Errorf("expected %d labels, got %d", len(tt.expected), len(labels))
}
for _, label := range tt.expected {
if !contains(labels, label) {
t.Errorf("expected label %s to be present", label)
}
}
})
}
}
6 changes: 4 additions & 2 deletions config/endpoint/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ type Endpoint struct {
// Headers of the request
Headers map[string]string `yaml:"headers,omitempty"`

// Labels are key-value pairs that can be used to metric the endpoint
Labels map[string]string `yaml:"labels,omitempty"`

// Interval is the duration to wait between every status check
Interval time.Duration `yaml:"interval,omitempty"`

Expand Down Expand Up @@ -374,8 +377,7 @@ func (e *Endpoint) call(result *Result) {
} else if endpointType == TypeSSH {
// If there's no username/password specified, attempt to validate just the SSH banner
if len(e.SSHConfig.Username) == 0 && len(e.SSHConfig.Password) == 0 {
result.Connected, result.HTTPStatus, err =
client.CheckSSHBanner(strings.TrimPrefix(e.URL, "ssh://"), e.ClientConfig)
result.Connected, result.HTTPStatus, err = client.CheckSSHBanner(strings.TrimPrefix(e.URL, "ssh://"), e.ClientConfig)
if err != nil {
result.AddError(err.Error())
return
Expand Down
16 changes: 16 additions & 0 deletions config/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package config

// toPtr returns a pointer to the given value
func toPtr[T any](value T) *T {
return &value
}

// contains checks if a key exists in the slice
func contains[T comparable](slice []T, key T) bool {
for _, item := range slice {
if item == key {
return true
}
}
return false
}
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/TwiN/gatus/v5/config"
"github.com/TwiN/gatus/v5/controller"
"github.com/TwiN/gatus/v5/metrics"
"github.com/TwiN/gatus/v5/storage/store"
"github.com/TwiN/gatus/v5/watchdog"
"github.com/TwiN/logr"
Expand Down Expand Up @@ -49,6 +50,7 @@ func main() {

func start(cfg *config.Config) {
go controller.Handle(cfg)
metrics.InitializePrometheusMetrics(cfg)
watchdog.Monitor(cfg)
go listenToConfigurationFileChanges(cfg)
}
Expand Down
47 changes: 26 additions & 21 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package metrics
import (
"strconv"

"github.com/TwiN/gatus/v5/config"
"github.com/TwiN/gatus/v5/config/endpoint"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
Expand All @@ -11,8 +12,6 @@ import (
const namespace = "gatus" // The prefix of the metrics

var (
initializedMetrics bool // Whether the metrics have been initialized

resultTotal *prometheus.CounterVec
resultDurationSeconds *prometheus.GaugeVec
resultConnectedTotal *prometheus.CounterVec
Expand All @@ -21,64 +20,70 @@ var (
resultEndpointSuccess *prometheus.GaugeVec
)

func initializePrometheusMetrics() {
func InitializePrometheusMetrics(cfg *config.Config) {
labels := cfg.GetMetricLabels()
resultTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Name: "results_total",
Help: "Number of results per endpoint",
}, []string{"key", "group", "name", "type", "success"})
}, append([]string{"key", "group", "name", "type", "success"}, labels...))
resultDurationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Name: "results_duration_seconds",
Help: "Duration of the request in seconds",
}, []string{"key", "group", "name", "type"})
}, append([]string{"key", "group", "name", "type"}, labels...))
resultConnectedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Name: "results_connected_total",
Help: "Total number of results in which a connection was successfully established",
}, []string{"key", "group", "name", "type"})
}, append([]string{"key", "group", "name", "type"}, labels...))
resultCodeTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Name: "results_code_total",
Help: "Total number of results by code",
}, []string{"key", "group", "name", "type", "code"})
}, append([]string{"key", "group", "name", "type", "code"}, labels...))
resultCertificateExpirationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Name: "results_certificate_expiration_seconds",
Help: "Number of seconds until the certificate expires",
}, []string{"key", "group", "name", "type"})
}, append([]string{"key", "group", "name", "type"}, labels...))
resultEndpointSuccess = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Name: "results_endpoint_success",
Help: "Displays whether or not the endpoint was a success",
}, []string{"key", "group", "name", "type"})
}, append([]string{"key", "group", "name", "type"}, labels...))
}

// PublishMetricsForEndpoint publishes metrics for the given endpoint and its result.
// These metrics will be exposed at /metrics if the metrics are enabled
func PublishMetricsForEndpoint(ep *endpoint.Endpoint, result *endpoint.Result) {
if !initializedMetrics {
initializePrometheusMetrics()
initializedMetrics = true
func PublishMetricsForEndpoint(ep *endpoint.Endpoint, result *endpoint.Result, labels []string) {
labelValues := []string{}
for _, label := range labels {
if value, ok := ep.Labels[label]; ok {
labelValues = append(labelValues, value)
} else {
labelValues = append(labelValues, "")
}
}

endpointType := ep.Type()
resultTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.FormatBool(result.Success)).Inc()
resultDurationSeconds.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(result.Duration.Seconds())
resultTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.FormatBool(result.Success)}, labelValues...)...).Inc()
resultDurationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.Duration.Seconds())
if result.Connected {
resultConnectedTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Inc()
resultConnectedTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Inc()
}
if result.DNSRCode != "" {
resultCodeTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), result.DNSRCode).Inc()
resultCodeTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), result.DNSRCode}, labelValues...)...).Inc()
}
if result.HTTPStatus != 0 {
resultCodeTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.Itoa(result.HTTPStatus)).Inc()
resultCodeTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.Itoa(result.HTTPStatus)}, labelValues...)...).Inc()
}
if result.CertificateExpiration != 0 {
resultCertificateExpirationSeconds.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(result.CertificateExpiration.Seconds())
resultCertificateExpirationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.CertificateExpiration.Seconds())
}
if result.Success {
resultEndpointSuccess.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(1)
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(1)
} else {
resultEndpointSuccess.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(0)
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(0)
}
}
19 changes: 12 additions & 7 deletions metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ import (
"testing"
"time"

"github.com/TwiN/gatus/v5/config"
"github.com/TwiN/gatus/v5/config/endpoint"
"github.com/TwiN/gatus/v5/config/endpoint/dns"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
)

func TestPublishMetricsForEndpoint(t *testing.T) {
InitializePrometheusMetrics(&config.Config{})

httpEndpoint := &endpoint.Endpoint{Name: "http-ep-name", Group: "http-ep-group", URL: "https://example.org"}
PublishMetricsForEndpoint(httpEndpoint, &endpoint.Result{
HTTPStatus: 200,
Expand All @@ -23,7 +26,7 @@ func TestPublishMetricsForEndpoint(t *testing.T) {
},
Success: true,
CertificateExpiration: 49 * time.Hour,
})
}, []string{})
err := testutil.GatherAndCompare(prometheus.Gatherers{prometheus.DefaultGatherer}, bytes.NewBufferString(`
# HELP gatus_results_code_total Total number of results by code
# TYPE gatus_results_code_total counter
Expand Down Expand Up @@ -57,7 +60,7 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
},
Success: false,
CertificateExpiration: 47 * time.Hour,
})
}, []string{})
err = testutil.GatherAndCompare(prometheus.Gatherers{prometheus.DefaultGatherer}, bytes.NewBufferString(`
# HELP gatus_results_code_total Total number of results by code
# TYPE gatus_results_code_total counter
Expand All @@ -82,10 +85,12 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
if err != nil {
t.Errorf("Expected no errors but got: %v", err)
}
dnsEndpoint := &endpoint.Endpoint{Name: "dns-ep-name", Group: "dns-ep-group", URL: "8.8.8.8", DNSConfig: &dns.Config{
QueryType: "A",
QueryName: "example.com.",
}}
dnsEndpoint := &endpoint.Endpoint{
Name: "dns-ep-name", Group: "dns-ep-group", URL: "8.8.8.8", DNSConfig: &dns.Config{
QueryType: "A",
QueryName: "example.com.",
},
}
PublishMetricsForEndpoint(dnsEndpoint, &endpoint.Result{
DNSRCode: "NOERROR",
Connected: true,
Expand All @@ -94,7 +99,7 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
{Condition: "[DNS_RCODE] == NOERROR", Success: true},
},
Success: true,
})
}, []string{})
err = testutil.GatherAndCompare(prometheus.Gatherers{prometheus.DefaultGatherer}, bytes.NewBufferString(`
# HELP gatus_results_code_total Total number of results by code
# TYPE gatus_results_code_total counter
Expand Down
Loading