Skip to content

Commit

Permalink
feat(maintenance): Per-endpoint maintenance configuration (#982)
Browse files Browse the repository at this point in the history
* feat: add endpoint.maintenance-windows array for per-endpoint maintenance configuration

* doc: initial entry for maintenance windows in endpoint config

* doc: example documentation for per-endpoint configuration of maintenance
windows

* chore: var => :=

* test: add checks for maintenance window defaults in endpoint configuration

* chore: clean up new-lines

---------

Co-authored-by: TwiN <[email protected]>
  • Loading branch information
alexmaras and TwiN authored Feb 17, 2025
1 parent 7e122a9 commit a1f7bd7
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 5 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ You can then configure alerts to be triggered when an endpoint is unhealthy once
| `endpoints[].ssh.username` | SSH username (e.g. example). | Required `""` |
| `endpoints[].ssh.password` | SSH password (e.g. password). | Required `""` |
| `endpoints[].alerts` | List of all alerts for a given endpoint. <br />See [Alerting](#alerting). | `[]` |
| `endpoints[].maintenance-windows` | List of all maintenance windows for a given endpoint. <br />See [Maintenance](#maintenance). | `[]` |
| `endpoints[].client` | [Client configuration](#client-configuration). | `{}` |
| `endpoints[].ui` | UI configuration at the endpoint level. | `{}` |
| `endpoints[].ui.hide-conditions` | Whether to hide conditions from the results. Note that this only hides conditions from results evaluated from the moment this was enabled. | `false` |
Expand Down Expand Up @@ -1710,6 +1711,19 @@ maintenance:
- Monday
- Thursday
```
You can also specify maintenance windows on a per-endpoint basis:
```yaml
endpoints:
- name: endpoint-1
url: "https://example.org"
maintenance-windows:
- start: "07:30"
duration: 40m
timezone: "Europe/Berlin"
- start: "14:30"
duration: 1h
timezone: "Europe/Berlin"
```


### Security
Expand Down
9 changes: 9 additions & 0 deletions config/endpoint/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"strings"
"time"

"github.com/TwiN/gatus/v5/config/maintenance"
"github.com/TwiN/gatus/v5/alerting/alert"
"github.com/TwiN/gatus/v5/client"
"github.com/TwiN/gatus/v5/config/endpoint/dns"
Expand Down Expand Up @@ -104,6 +105,9 @@ type Endpoint struct {
// Alerts is the alerting configuration for the endpoint in case of failure
Alerts []*alert.Alert `yaml:"alerts,omitempty"`

// MaintenanceWindow is the configuration for per-endpoint maintenance windows
MaintenanceWindows []*maintenance.Config `yaml:"maintenance-windows,omitempty"`

// DNSConfig is the configuration for DNS monitoring
DNSConfig *dns.Config `yaml:"dns,omitempty"`

Expand Down Expand Up @@ -219,6 +223,11 @@ func (e *Endpoint) ValidateAndSetDefaults() error {
if e.Type() == TypeUNKNOWN {
return ErrUnknownEndpointType
}
for _, maintenanceWindow := range e.MaintenanceWindows {
if err := maintenanceWindow.ValidateAndSetDefaults(); err != nil {
return err
}
}
// Make sure that the request can be created
_, err := http.NewRequest(e.Method, e.URL, bytes.NewBuffer([]byte(e.Body)))
if err != nil {
Expand Down
19 changes: 15 additions & 4 deletions config/endpoint/endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/TwiN/gatus/v5/config/endpoint/dns"
"github.com/TwiN/gatus/v5/config/endpoint/ssh"
"github.com/TwiN/gatus/v5/config/endpoint/ui"
"github.com/TwiN/gatus/v5/config/maintenance"
"github.com/TwiN/gatus/v5/test"
)

Expand Down Expand Up @@ -390,10 +391,11 @@ func TestEndpoint_Type(t *testing.T) {

func TestEndpoint_ValidateAndSetDefaults(t *testing.T) {
endpoint := Endpoint{
Name: "website-health",
URL: "https://twin.sh/health",
Conditions: []Condition{Condition("[STATUS] == 200")},
Alerts: []*alert.Alert{{Type: alert.TypePagerDuty}},
Name: "website-health",
URL: "https://twin.sh/health",
Conditions: []Condition{Condition("[STATUS] == 200")},
Alerts: []*alert.Alert{{Type: alert.TypePagerDuty}},
MaintenanceWindows: []*maintenance.Config{{Start: "03:50", Duration: 4 * time.Hour}},
}
if err := endpoint.ValidateAndSetDefaults(); err != nil {
t.Errorf("Expected no error, got %v", err)
Expand Down Expand Up @@ -432,6 +434,15 @@ func TestEndpoint_ValidateAndSetDefaults(t *testing.T) {
if endpoint.Alerts[0].FailureThreshold != 3 {
t.Error("Endpoint alert should've defaulted to a failure threshold of 3")
}
if len(endpoint.MaintenanceWindows) != 1 {
t.Error("Endpoint should've had 1 maintenance window")
}
if !endpoint.MaintenanceWindows[0].IsEnabled() {
t.Error("Endpoint maintenance should've defaulted to true")
}
if endpoint.MaintenanceWindows[0].Timezone != "UTC" {
t.Error("Endpoint maintenance should've defaulted to UTC")
}
}

func TestEndpoint_ValidateAndSetDefaultsWithInvalidCondition(t *testing.T) {
Expand Down
9 changes: 8 additions & 1 deletion watchdog/watchdog.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,14 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
} else {
logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
}
if !maintenanceConfig.IsUnderMaintenance() {
inEndpointMaintenanceWindow := false
for _, maintenanceWindow := range ep.MaintenanceWindows {
if maintenanceWindow.IsUnderMaintenance() {
logr.Debug("[watchdog.execute] Under endpoint maintenance window")
inEndpointMaintenanceWindow = true
}
}
if !maintenanceConfig.IsUnderMaintenance() && !inEndpointMaintenanceWindow {
// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
HandleAlerting(ep, result, alertingConfig)
} else {
Expand Down

0 comments on commit a1f7bd7

Please sign in to comment.