Skip to content

Commit

Permalink
Let partition capacity use issue evaluation.
Browse files Browse the repository at this point in the history
  • Loading branch information
Gerrit91 committed Oct 9, 2023
1 parent 92b4a4f commit 500f1f5
Show file tree
Hide file tree
Showing 23 changed files with 178 additions and 194 deletions.
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/asn-uniqueness.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ type (
}
)

func (i *IssueASNUniqueness) Spec() *issueSpec {
return &issueSpec{
func (i *IssueASNUniqueness) Spec() *spec {
return &spec{
Type: IssueTypeASNUniqueness,
Severity: IssueSeverityMinor,
Description: "The ASN is not unique (only impact on firewalls)",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/bmc-info-outdated.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ func (i *IssueBMCInfoOutdated) Evaluate(m metal.Machine, ec metal.ProvisioningEv
return false
}

func (*IssueBMCInfoOutdated) Spec() *issueSpec {
return &issueSpec{
func (*IssueBMCInfoOutdated) Spec() *spec {
return &spec{
Type: IssueTypeBMCInfoOutdated,
Severity: IssueSeverityMajor,
Description: "BMC has not been updated from either metal-hammer or metal-bmc",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/bmc-without-ip.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ type (
IssueBMCWithoutIP struct{}
)

func (i *IssueBMCWithoutIP) Spec() *issueSpec {
return &issueSpec{
func (i *IssueBMCWithoutIP) Spec() *spec {
return &spec{
Type: IssueTypeBMCWithoutIP,
Severity: IssueSeverityMajor,
Description: "BMC has no ip address",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/bmc-without-mac.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ type (
IssueBMCWithoutMAC struct{}
)

func (i *IssueBMCWithoutMAC) Spec() *issueSpec {
return &issueSpec{
func (i *IssueBMCWithoutMAC) Spec() *spec {
return &spec{
Type: IssueTypeBMCWithoutMAC,
Severity: IssueSeverityMajor,
Description: "BMC has no mac address",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/crash-loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ type (
IssueCrashLoop struct{}
)

func (i *IssueCrashLoop) Spec() *issueSpec {
return &issueSpec{
func (i *IssueCrashLoop) Spec() *spec {
return &spec{
Type: IssueTypeCrashLoop,
Severity: IssueSeverityMajor,
Description: "machine is in a provisioning crash loop (⭕)",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/failed-machine-reclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ type (
IssueFailedMachineReclaim struct{}
)

func (i *IssueFailedMachineReclaim) Spec() *issueSpec {
return &issueSpec{
func (i *IssueFailedMachineReclaim) Spec() *spec {
return &spec{
Type: IssueTypeFailedMachineReclaim,
Severity: IssueSeverityCritical,
Description: "machine phones home but not allocated",
Expand Down
26 changes: 15 additions & 11 deletions cmd/metal-api/internal/issues/issues.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,20 @@ type (
// MachineIssues is map of a machine response to a list of machine issues
MachineIssues []*MachineWithIssues

machineIssueMap map[*metal.Machine]Issues
MachineIssuesMap map[*metal.Machine]Issues

issueImpl interface {
issue interface {
// Evaluate decides whether a given machine has the machine issue.
// the third argument contains additional information that may be required for the issue evaluation
Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool
// Spec returns the issue spec of this issue.
Spec() *issueSpec
Spec() *spec
// Details returns additional information on the issue after the evaluation.
Details() string
}

// issueSpec defines the specification of an issue.
issueSpec struct {
// spec defines the specification of an issue.
spec struct {
Type IssueType
Severity IssueSeverity
Description string
Expand All @@ -74,7 +74,7 @@ func AllIssues() Issues {
return res
}

func toIssue(i issueImpl) Issue {
func toIssue(i issue) Issue {
return Issue{
Type: i.Spec().Type,
Severity: i.Spec().Severity,
Expand All @@ -84,8 +84,12 @@ func toIssue(i issueImpl) Issue {
}
}

func FindIssues(c *IssueConfig) (MachineIssues, error) {
res := machineIssueMap{}
func FindIssues(c *IssueConfig) (MachineIssuesMap, error) {
if c.LastErrorThreshold == 0 {
c.LastErrorThreshold = DefaultLastErrorThreshold()
}

res := MachineIssuesMap{}

ecs := c.EventContainers.ByID()

Expand Down Expand Up @@ -114,7 +118,7 @@ func FindIssues(c *IssueConfig) (MachineIssues, error) {
}
}

return res.toList(), nil
return res, nil
}

func (mis MachineIssues) Get(id string) *MachineWithIssues {
Expand Down Expand Up @@ -161,7 +165,7 @@ func (c *IssueConfig) includeIssue(t IssueType) bool {
return true
}

func (mim machineIssueMap) add(m metal.Machine, issue Issue) {
func (mim MachineIssuesMap) add(m metal.Machine, issue Issue) {
issues, ok := mim[&m]
if !ok {
issues = Issues{}
Expand All @@ -170,7 +174,7 @@ func (mim machineIssueMap) add(m metal.Machine, issue Issue) {
mim[&m] = issues
}

func (mim machineIssueMap) toList() MachineIssues {
func (mim MachineIssuesMap) ToList() MachineIssues {
var res MachineIssues

for m, issues := range mim {
Expand Down
2 changes: 1 addition & 1 deletion cmd/metal-api/internal/issues/issues_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ func TestFindIssues(t *testing.T) {
want = tt.want(ms)
}

if diff := cmp.Diff(want, got, cmp.AllowUnexported(IssueLastEventError{}, IssueASNUniqueness{}, IssueNonDistinctBMCIP{})); diff != "" {
if diff := cmp.Diff(want, got.ToList(), cmp.AllowUnexported(IssueLastEventError{}, IssueASNUniqueness{}, IssueNonDistinctBMCIP{})); diff != "" {
t.Errorf("diff (+got -want):\n %s", diff)
}
})
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/last-event-error.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ func DefaultLastErrorThreshold() time.Duration {
return 7 * 24 * time.Hour
}

func (i *IssueLastEventError) Spec() *issueSpec {
return &issueSpec{
func (i *IssueLastEventError) Spec() *spec {
return &spec{
Type: IssueTypeLastEventError,
Severity: IssueSeverityMinor,
Description: "the machine had an error during the provisioning lifecycle",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/liveliness-dead.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ type (
IssueLivelinessDead struct{}
)

func (i *IssueLivelinessDead) Spec() *issueSpec {
return &issueSpec{
func (i *IssueLivelinessDead) Spec() *spec {
return &spec{
Type: IssueTypeLivelinessDead,
Severity: IssueSeverityMajor,
Description: "the machine is not sending events anymore",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/liveliness-not-available.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ type (
IssueLivelinessNotAvailable struct{}
)

func (i *IssueLivelinessNotAvailable) Spec() *issueSpec {
return &issueSpec{
func (i *IssueLivelinessNotAvailable) Spec() *spec {
return &spec{
Type: IssueTypeLivelinessNotAvailable,
Severity: IssueSeverityMinor,
Description: "the machine liveliness is not available",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/liveliness-unknown.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ type (
IssueLivelinessUnknown struct{}
)

func (i *IssueLivelinessUnknown) Spec() *issueSpec {
return &issueSpec{
func (i *IssueLivelinessUnknown) Spec() *spec {
return &spec{
Type: IssueTypeLivelinessUnknown,
Severity: IssueSeverityMajor,
Description: "the machine is not sending LLDP alive messages anymore",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/no-event-container.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ type (
IssueNoEventContainer struct{}
)

func (i *IssueNoEventContainer) Spec() *issueSpec {
return &issueSpec{
func (i *IssueNoEventContainer) Spec() *spec {
return &spec{
Type: IssueTypeNoEventContainer,
Severity: IssueSeverityMajor,
Description: "machine has no event container",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/no-partition.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ type (
IssueNoPartition struct{}
)

func (i *IssueNoPartition) Spec() *issueSpec {
return &issueSpec{
func (i *IssueNoPartition) Spec() *spec {
return &spec{
Type: IssueTypeNoPartition,
Severity: IssueSeverityMajor,
Description: "machine with no partition",
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/issues/non-distinct-bmc-ip.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ type (
}
)

func (i *IssueNonDistinctBMCIP) Spec() *issueSpec {
return &issueSpec{
func (i *IssueNonDistinctBMCIP) Spec() *spec {
return &spec{
Type: IssueTypeNonDistinctBMCIP,
Description: "BMC IP address is not distinct",
RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#bmc-no-distinct-ip",
Expand Down
2 changes: 1 addition & 1 deletion cmd/metal-api/internal/issues/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func AllIssueTypes() []IssueType {
}
}

func NewIssueFromType(t IssueType) (issueImpl, error) {
func NewIssueFromType(t IssueType) (issue, error) {
switch t {
case IssueTypeNoPartition:
return &IssueNoPartition{}, nil
Expand Down
4 changes: 2 additions & 2 deletions cmd/metal-api/internal/metal/provisioning.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ var (
type ProvisioningEvents []ProvisioningEvent

// Is return true if given event is equal to specific EventType
func (p ProvisioningEventType) Is(event string) bool {
return string(p) == event
func (p ProvisioningEventType) Is(event ProvisioningEventType) bool {
return p == event
}

// TrimEvents trim the events to maxCount
Expand Down
2 changes: 1 addition & 1 deletion cmd/metal-api/internal/metal/provisioning_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
func TestProvisioningEventType_Is(t *testing.T) {
tests := []struct {
name string
event string
event ProvisioningEventType
p ProvisioningEventType
want bool
}{
Expand Down
29 changes: 4 additions & 25 deletions cmd/metal-api/internal/service/machine-service.go
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ func (r *machineResource) issues(request *restful.Request, response *restful.Res
return
}

issues, err := issues.FindIssues(&issues.IssueConfig{
machinesWithIssues, err := issues.FindIssues(&issues.IssueConfig{
Machines: ms,
EventContainers: ecs,
Severity: severity,
Expand All @@ -607,7 +607,7 @@ func (r *machineResource) issues(request *restful.Request, response *restful.Res
}

var issueResponse []*v1.MachineIssueResponse
for _, machineWithIssues := range issues {
for _, machineWithIssues := range machinesWithIssues.ToList() {
machineWithIssues := machineWithIssues

entry := &v1.MachineIssueResponse{
Expand Down Expand Up @@ -1939,7 +1939,7 @@ func evaluateMachineLiveliness(ds *datastore.RethinkStore, m metal.Machine) (met
provisioningEvents, err := ds.FindProvisioningEventContainer(m.ID)
if err != nil {
// we have no provisioning events... we cannot tell
return metal.MachineLivelinessUnknown, fmt.Errorf("no provisioningEvents found for ID: %s", m.ID)
return metal.MachineLivelinessUnknown, fmt.Errorf("no provisioning event container found for machine: %s", m.ID)
}

old := *provisioningEvents
Expand All @@ -1956,6 +1956,7 @@ func evaluateMachineLiveliness(ds *datastore.RethinkStore, m metal.Machine) (met
} else {
provisioningEvents.Liveliness = metal.MachineLivelinessAlive
}

err = ds.UpdateProvisioningEventContainer(&old, provisioningEvents)
if err != nil {
return provisioningEvents.Liveliness, err
Expand Down Expand Up @@ -2013,7 +2014,6 @@ func ResurrectMachines(ctx context.Context, ds *datastore.RethinkStore, publishe
}
continue
}

}

logger.Info("finished machine resurrection")
Expand Down Expand Up @@ -2238,27 +2238,6 @@ func publishMachineCmd(logger *zap.SugaredLogger, m *metal.Machine, publisher bu
return nil
}

func machineHasIssues(m *v1.MachineResponse) bool {
if m.Partition == nil {
return true
}
if !metal.MachineLivelinessAlive.Is(m.Liveliness) {
return true
}
if m.Allocation == nil && len(m.RecentProvisioningEvents.Events) > 0 && metal.ProvisioningEventPhonedHome.Is(m.RecentProvisioningEvents.Events[0].Event) {
// not allocated, but phones home
return true
}
if m.RecentProvisioningEvents.CrashLoop || m.RecentProvisioningEvents.FailedMachineReclaim {
// Machines in crash loop but in "Waiting" state are considered available
if len(m.RecentProvisioningEvents.Events) > 0 && !metal.ProvisioningEventWaiting.Is(m.RecentProvisioningEvents.Events[0].Event) {
return true
}
}

return false
}

func makeMachineResponse(m *metal.Machine, ds *datastore.RethinkStore) (*v1.MachineResponse, error) {
s, p, i, ec, err := findMachineReferencedEntities(m, ds)
if err != nil {
Expand Down
Loading

0 comments on commit 500f1f5

Please sign in to comment.