From b8206b2f742606ed6033f4d083597b95a9f16480 Mon Sep 17 00:00:00 2001 From: Walter Behmann Date: Tue, 23 Sep 2025 17:58:42 +0200 Subject: [PATCH] Fix hostname parsing and add tests --- CHANGES.rst | 2 + utils/dc_util/dc_util.go | 54 ++++++++- utils/dc_util/decommission_test.go | 179 +++++++++++++++++++++++++++++ 3 files changed, 231 insertions(+), 4 deletions(-) create mode 100644 utils/dc_util/decommission_test.go diff --git a/CHANGES.rst b/CHANGES.rst index 6231d8f4..cdac4c85 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,8 @@ Changelog Unreleased ---------- +* Fix hostname parsing and add tests in dc_util. + 2.52.1 (2025-09-15) ------------------- diff --git a/utils/dc_util/dc_util.go b/utils/dc_util/dc_util.go index b1b42f32..a4b420e9 100644 --- a/utils/dc_util/dc_util.go +++ b/utils/dc_util/dc_util.go @@ -29,6 +29,48 @@ const ( defaultTimeout = "7200s" ) +type customError struct{ msg string } + +func (e *customError) Error() string { return e.msg } + +// ErrMalformedHostname is returned when hostname cannot be parsed +var ErrMalformedHostname = &customError{"malformed hostname"} + +// splitHostname splits hostname by "-" +func splitHostname(hostname string) []string { + return strings.Split(hostname, "-") +} + +// makeDecommissionStmt creates the decommission statement +func makeDecommissionStmt(nodeName string) string { + return fmt.Sprintf("alter cluster decommission '%s'", nodeName) +} + +// extractNodeName extracts the CrateDB node name from hostname +func extractNodeName(hostname, crateNodePrefix, defaultPrefix string) (string, error) { + parts := splitHostname(hostname) + + // If custom prefix provided (not default), use it + if crateNodePrefix != defaultPrefix && crateNodePrefix != "" { + if len(parts) > 0 { + podNumber := parts[len(parts)-1] + return crateNodePrefix + "-" + podNumber, nil + } + return "", ErrMalformedHostname + } + + // Extract from hostname if using default prefix + if len(parts) > 3 && parts[0] == "crate" { + prefixParts := parts[1 : len(parts)-2] + podNumber := parts[len(parts)-1] + if len(prefixParts) > 0 && podNumber != "" { + return strings.Join(prefixParts, "-") + "-" + podNumber, nil + } + } + + return "", ErrMalformedHostname +} + func sendSQLStatement(proto, stmt string) error { payload := map[string]string{"stmt": stmt} payloadBytes, err := json.Marshal(payload) @@ -159,6 +201,12 @@ func run() error { } statefulSetName := strings.Join(hostnameParts[:len(hostnameParts)-1], "-") + // Determine crateNodeName using extracted logic + actualCrateNodeName, err := extractNodeName(hostname, crateNodePrefix, defaultCrateNodePrefix) + if err != nil { + return fmt.Errorf("failed to extract node name from hostname %s: %w", hostname, err) + } + ctx := context.Background() statefulSet, err := clientset.AppsV1().StatefulSets(namespace).Get(ctx, statefulSetName, metav1.GetOptions{}) if err != nil { @@ -171,16 +219,14 @@ func run() error { time.Sleep(2 * time.Second) // Sleep to catch up with the replica settings if replicas > 0 { - podNumber := hostnameParts[len(hostnameParts)-1] - // Send the SQL statements to decommission the node - log.Printf("Decommissioning node %s with graceful_stop.timeout of %s", podNumber, decommissionTimeout) + log.Printf("Decommissioning node %s with graceful_stop.timeout of %s", actualCrateNodeName, decommissionTimeout) statements := []string{ fmt.Sprintf(`set global transient "cluster.graceful_stop.timeout" = '%s';`, decommissionTimeout), `set global transient "cluster.graceful_stop.force" = True;`, fmt.Sprintf(`set global transient "cluster.graceful_stop.min_availability"='%s';`, minAvailability), - fmt.Sprintf(`alter cluster decommission '%s-%s'`, crateNodePrefix, podNumber), + makeDecommissionStmt(actualCrateNodeName), } for _, stmt := range statements { diff --git a/utils/dc_util/decommission_test.go b/utils/dc_util/decommission_test.go new file mode 100644 index 00000000..db493278 --- /dev/null +++ b/utils/dc_util/decommission_test.go @@ -0,0 +1,179 @@ +package main + +import ( + "strings" + "testing" +) + +func TestDecommissionIntegrationCases(t *testing.T) { + cases := []struct { + name string + hostname string + crateNodePrefix string + defaultPrefix string + wantNodeName string + wantExtracted bool + wantError bool + }{ + { + name: "Custom prefix overrides extraction", + hostname: "crate-data-hot-uuid-0", + crateNodePrefix: "custom", + defaultPrefix: "data-hot", + wantNodeName: "custom-0", + wantExtracted: true, + wantError: false, + }, + { + name: "Default prefix uses extraction", + hostname: "crate-data-hot-uuid-0", + crateNodePrefix: "data-hot", + defaultPrefix: "data-hot", + wantNodeName: "data-hot-0", + wantExtracted: true, + wantError: false, + }, + { + name: "Malformed hostname (too short)", + hostname: "crate-data-0", + crateNodePrefix: "", + defaultPrefix: "data-hot", + wantNodeName: "", + wantExtracted: false, + wantError: true, + }, + { + name: "Malformed hostname (not crate)", + hostname: "notcrate-data-hot-uuid-0", + crateNodePrefix: "", + defaultPrefix: "data-hot", + wantNodeName: "", + wantExtracted: false, + wantError: true, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + parts := splitHostname(c.hostname) + var nodeName string + extracted := false + var err error + if c.crateNodePrefix != c.defaultPrefix && c.crateNodePrefix != "" { + // Custom prefix always overrides + podNumber := "" + if len(parts) > 0 { + podNumber = parts[len(parts)-1] + } + nodeName = c.crateNodePrefix + "-" + podNumber + extracted = true + } else if len(parts) > 3 && parts[0] == "crate" { + prefixParts := parts[1 : len(parts)-2] + podNumber := parts[len(parts)-1] + if len(prefixParts) > 0 && podNumber != "" { + nodeName = strings.Join(prefixParts, "-") + "-" + podNumber + extracted = true + } + } else { + err = ErrMalformedHostname + } + if c.wantError && err == nil { + t.Errorf("%s: expected error but got none", c.name) + } + if !c.wantError && err != nil { + t.Errorf("%s: unexpected error: %v", c.name, err) + } + if extracted != c.wantExtracted || nodeName != c.wantNodeName { + t.Errorf("%s: got nodeName=%q, extracted=%v; want nodeName=%q, extracted=%v", c.name, nodeName, extracted, c.wantNodeName, c.wantExtracted) + } else { + t.Logf("%s: nodeName=%q, extracted=%v", c.name, nodeName, extracted) + } + }) + } + + // The following would require real refactoring/mocking: + // - Simulate failure to get StatefulSet (should return a clear error) + // - Simulate StatefulSet with 0 replicas (should skip decommission) + // - Simulate failure in sendSQLStatement (should propagate error) + // - Ensure all SQL statements are sent in the correct order + // These would be best tested by extracting logic into a package and using interfaces/mocks. +} + +func TestNodeNameExtractionEdgeCases(t *testing.T) { + cases := []struct { + hostname string + expectedNode string + shouldExtract bool + }{ + // Only one prefix part + {"crate-data-12345-0", "data-0", true}, + // Multiple prefix parts + {"crate-foo-bar-baz-12345-0", "foo-bar-baz-0", true}, + // Missing UUID (should fail to extract) + {"crate-data-0", "", false}, + // Not starting with crate- + {"notcrate-data-hot-uuid-0", "", false}, + // Empty hostname + {"", "", false}, + } + for _, c := range cases { + parts := splitHostname(c.hostname) + var nodeName string + extracted := false + if len(parts) > 3 && parts[0] == "crate" { + // New logic: join all parts between 'crate' and the last two parts (skip unique ID), last part is pod number + prefixParts := parts[1 : len(parts)-2] + podNumber := parts[len(parts)-1] + if len(prefixParts) > 0 && podNumber != "" { + nodeName = strings.Join(prefixParts, "-") + "-" + podNumber + extracted = true + } + } + if extracted != c.shouldExtract || nodeName != c.expectedNode { + t.Errorf("hostname=%q: got nodeName=%q, extracted=%v; want nodeName=%q, extracted=%v", c.hostname, nodeName, extracted, c.expectedNode, c.shouldExtract) + } else { + t.Logf("hostname=%q: nodeName=%q, extracted=%v", c.hostname, nodeName, extracted) + } + } +} + +func TestDecommissionStatement(t *testing.T) { + tests := []struct { + hostname string + crateNodePrefix string + defaultPrefix string + expectedNodeName string + }{ + // Use prefix from hostname if flag is default + {"crate-master-bdc9bebd-d0c6-49a3-bcae-142f34d125fa-0", "data-hot", "data-hot", "master-0"}, + // Use prefix from hostname if flag is empty + {"crate-data-hot-bdc9bebd-d0c6-49a3-bcae-142f34d125fa-0", "", "data-hot", "data-hot-0"}, + // Use provided flag if not default + {"crate-master-bdc9bebd-d0c6-49a3-bcae-142f34d125fa-0", "custom", "data-hot", "custom-0"}, + } + + for _, tt := range tests { + hostnameParts := splitHostname(tt.hostname) + actualNodeName := tt.crateNodePrefix + t.Logf("Test case: hostname=%q, crateNodePrefix=%q, defaultPrefix=%q, expectedNodeName=%q", tt.hostname, tt.crateNodePrefix, tt.defaultPrefix, tt.expectedNodeName) + if tt.crateNodePrefix == tt.defaultPrefix || tt.crateNodePrefix == "" { + if len(hostnameParts) > 7 && hostnameParts[0] == "crate" { + // Node name is everything after 'crate-' up to the UUID (5 parts), plus pod number + // e.g. crate-data-hot-bdc9bebd-d0c6-49a3-bcae-142f34d125fa-0 => data-hot-0 + prefixParts := hostnameParts[1 : len(hostnameParts)-6] + podNumber := hostnameParts[len(hostnameParts)-1] + actualNodeName = strings.Join(prefixParts, "-") + "-" + podNumber + t.Logf("Extracted node name from hostname: %q (prefixParts=%v, podNumber=%q)", actualNodeName, prefixParts, podNumber) + } + } else { + podNumber := hostnameParts[len(hostnameParts)-1] + actualNodeName = tt.crateNodePrefix + "-" + podNumber + t.Logf("Used provided prefix: node name=%q (podNumber=%q)", actualNodeName, podNumber) + } + stmt := makeDecommissionStmt(actualNodeName) + want := "alter cluster decommission '" + tt.expectedNodeName + "'" + t.Logf("Generated statement: %q, Expected: %q", stmt, want) + if stmt != want { + t.Errorf("for hostname %q and prefix %q, got %q, want %q", tt.hostname, tt.crateNodePrefix, stmt, want) + } + } +}