Skip to content

worker profiles (#1915) #1974

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 53 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
63a6a04
worker profiles investigation (#1915)
laverya Mar 17, 2025
e4afc3e
schemas
hedge-sparrow Mar 17, 2025
4c3a43b
Merge remote-tracking branch 'origin/main' into ash/worker-profiles
hedge-sparrow Mar 17, 2025
4c7540c
schemas
hedge-sparrow Mar 17, 2025
f2f88bf
try and get first defined profile from kots join command response
hedge-sparrow Mar 19, 2025
f9fb7ac
Merge branch 'main' into ash/worker-profiles
hedge-sparrow Mar 19, 2025
0b80627
fix outdated release.GetEmbeddedClusterConfig calls
hedge-sparrow Mar 19, 2025
6035ebb
remove unneeded check for profile name
hedge-sparrow Mar 19, 2025
014ea09
change workerprofiles position in test config
hedge-sparrow Mar 19, 2025
1041615
reverse selection of join profile
hedge-sparrow Mar 19, 2025
28e35a2
kots image override
hedge-sparrow Mar 19, 2025
daadf03
test print spec
hedge-sparrow Mar 20, 2025
7d23c91
test print spec
hedge-sparrow Mar 20, 2025
b24198d
move worker profiles back to k0s override config
hedge-sparrow Mar 20, 2025
b7768be
no custom kots
hedge-sparrow Mar 20, 2025
4af0dfe
move test config worker profiles
hedge-sparrow Mar 20, 2025
eb68fee
don't care about profiles key here
hedge-sparrow Mar 20, 2025
8c2c25c
oops
hedge-sparrow Mar 20, 2025
8ae806f
more safety
hedge-sparrow Mar 20, 2025
b35da65
print debug
hedge-sparrow Mar 20, 2025
e1f7ce1
extract k0s config from installation object properly
hedge-sparrow Mar 20, 2025
6c79ac9
uh
hedge-sparrow Mar 20, 2025
33512a0
generate
hedge-sparrow Mar 20, 2025
8bbb894
manifests
hedge-sparrow Mar 20, 2025
3ddbe2b
load profiles from config on disk
hedge-sparrow Mar 20, 2025
f53b899
manifests
hedge-sparrow Mar 20, 2025
76f9952
remove unused err check
hedge-sparrow Mar 20, 2025
87a7c83
vibe coding
hedge-sparrow Mar 20, 2025
8cb65b4
also include worker profiles when patching config
hedge-sparrow Mar 21, 2025
3a1fac1
debug loggin
hedge-sparrow Mar 21, 2025
d290c1e
debug loggin
hedge-sparrow Mar 21, 2025
490a601
more debug logging
hedge-sparrow Mar 21, 2025
6209ad6
more debug logging
hedge-sparrow Mar 21, 2025
024304e
try Dig over DigMapping
hedge-sparrow Mar 21, 2025
11a3b2c
log patched config
hedge-sparrow Mar 21, 2025
980ac37
make sure config is synced to disk before reading back to get profiles
hedge-sparrow Mar 21, 2025
89f65c8
verbose logging of config loading?
hedge-sparrow Mar 21, 2025
71fee40
try k0s own config reader
hedge-sparrow Mar 21, 2025
a2662dc
proper debug logging
hedge-sparrow Mar 21, 2025
4c39791
debug not error
hedge-sparrow Mar 21, 2025
3b85722
Merge remote-tracking branch 'origin/main' into ash/worker-profiles
hedge-sparrow Mar 21, 2025
a98d1ee
PR comments
hedge-sparrow Mar 24, 2025
0ff20de
Remove unused config from other function sigs
hedge-sparrow Mar 24, 2025
05cf032
add worker profiles to e2e apps
hedge-sparrow Mar 24, 2025
1a4eadc
add check for worker profiles
hedge-sparrow Mar 24, 2025
4dd7f33
Merge remote-tracking branch 'origin/main' into ash/worker-profiles
laverya Mar 25, 2025
b8de1bf
rework worker profile checking
laverya Mar 25, 2025
83cd7eb
previous-stable is not the current version
laverya Mar 25, 2025
a19a6d4
fix check for profile on worker nodes
laverya Mar 25, 2025
25dcebd
remove worker profile check from check-installation-state due to old-…
laverya Mar 25, 2025
51b3ee7
pass errors when relevant
laverya Mar 25, 2025
0233f9a
fix dryrun join test, add more nil checking
laverya Mar 25, 2025
dca3b06
Merge remote-tracking branch 'origin/main' into ash/worker-profiles
laverya Mar 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions cmd/installer/cli/join.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"
"syscall"

k0sconfig "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1"
ecv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1"
"github.com/replicatedhq/embedded-cluster/pkg/addons"
"github.com/replicatedhq/embedded-cluster/pkg/airgap"
Expand Down Expand Up @@ -340,8 +341,13 @@ func installAndJoinCluster(ctx context.Context, jcmd *kotsadm.JoinCommandRespons
return fmt.Errorf("unable to apply configuration overrides: %w", err)
}

profile, err := getFirstDefinedProfile()
if err != nil {
return fmt.Errorf("unable to get first defined profile: %w", err)
}

logrus.Debugf("joining node to cluster")
if err := runK0sInstallCommand(flags.networkInterface, jcmd.K0sJoinCommand); err != nil {
if err := runK0sInstallCommand(flags.networkInterface, jcmd.K0sJoinCommand, profile); err != nil {
return fmt.Errorf("unable to join node to cluster: %w", err)
}

Expand Down Expand Up @@ -459,9 +465,24 @@ func applyJoinConfigurationOverrides(jcmd *kotsadm.JoinCommandResponse) error {
return nil
}

func getFirstDefinedProfile() (string, error) {
k0scfg, err := os.Open(runtimeconfig.PathToK0sConfig())
if err != nil {
return "", fmt.Errorf("unable to open k0s config: %w", err)
}
defer k0scfg.Close()
cfg, err := k0sconfig.ConfigFromReader(k0scfg)
if err != nil {
return "", fmt.Errorf("unable to parse k0s config: %w", err)
}
if len(cfg.Spec.WorkerProfiles) > 0 {
return cfg.Spec.WorkerProfiles[0].Name, nil
}
return "", nil
}

// runK0sInstallCommand runs the k0s install command as provided by the kots
// adm api.
func runK0sInstallCommand(networkInterface string, fullcmd string) error {
func runK0sInstallCommand(networkInterface string, fullcmd string, profile string) error {
args := strings.Split(fullcmd, " ")
args = append(args, "--token-file", "/etc/k0s/join-token")

Expand All @@ -470,6 +491,10 @@ func runK0sInstallCommand(networkInterface string, fullcmd string) error {
return fmt.Errorf("unable to find first valid address: %w", err)
}

if profile != "" {
args = append(args, "--profile", profile)
}

args = append(args, config.AdditionalInstallFlags(nodeIP)...)

if strings.Contains(fullcmd, "controller") {
Expand Down
41 changes: 40 additions & 1 deletion e2e/install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,11 @@ func TestMultiNodeInstallation(t *testing.T) {
if stdout, stderr, err := tc.RunCommandOnNode(0, []string{"single-node-install.sh", "ui", os.Getenv("SHORT_SHA")}); err != nil {
t.Fatalf("fail to install embedded-cluster on node 0: %v: %s: %s", err, stdout, stderr)
}
t.Logf("checking worker profile on controller node %d", 0)
line := []string{"check-worker-profile.sh"}
if stdout, stderr, err := tc.RunCommandOnNode(0, line); err != nil {
t.Fatalf("fail to check worker profile on node %d: %v: %s: %s", 0, err, stdout, stderr)
}

if stdout, stderr, err := tc.SetupPlaywrightAndRunTest("deploy-app"); err != nil {
t.Fatalf("fail to run playwright test deploy-app: %v: %s: %s", err, stdout, stderr)
Expand Down Expand Up @@ -422,6 +427,13 @@ func TestMultiNodeInstallation(t *testing.T) {
if stdout, stderr, err := tc.RunCommandOnNode(node, strings.Split(cmd, " ")); err != nil {
t.Fatalf("fail to join node %d as a controller: %v: %s: %s", node, err, stdout, stderr)
}

t.Logf("checking worker profile on controller node %d", node)
line := []string{"check-worker-profile.sh"}
if stdout, stderr, err := tc.RunCommandOnNode(node, line); err != nil {
t.Fatalf("fail to check worker profile on node %d: %v: %s: %s", node, err, stdout, stderr)
}

// XXX If we are too aggressive joining nodes we can see the following error being
// thrown by kotsadm on its log (and we get a 500 back):
// "
Expand All @@ -436,6 +448,12 @@ func TestMultiNodeInstallation(t *testing.T) {
t.Fatalf("fail to join node 3 to the cluster as a worker: %v: %s: %s", err, stdout, stderr)
}

t.Logf("checking worker profile on worker node %d", 3)
line = []string{"check-worker-profile.sh"}
if stdout, stderr, err := tc.RunCommandOnNode(3, line); err != nil {
t.Fatalf("fail to check worker profile on node %d: %v: %s: %s", 3, err, stdout, stderr)
}

// wait for the nodes to report as ready.
t.Logf("%s: all nodes joined, waiting for them to be ready", time.Now().Format(time.RFC3339))
stdout, stderr, err = tc.RunCommandOnNode(0, []string{"wait-for-ready-nodes.sh", "4"})
Expand All @@ -444,7 +462,7 @@ func TestMultiNodeInstallation(t *testing.T) {
}

t.Logf("%s: checking installation state", time.Now().Format(time.RFC3339))
line := []string{"check-installation-state.sh", os.Getenv("SHORT_SHA"), k8sVersion()}
line = []string{"check-installation-state.sh", os.Getenv("SHORT_SHA"), k8sVersion()}
if stdout, stderr, err := tc.RunCommandOnNode(0, line); err != nil {
t.Fatalf("fail to check installation state: %v: %s: %s", err, stdout, stderr)
}
Expand Down Expand Up @@ -2026,6 +2044,12 @@ func TestMultiNodeAirgapHAInstallation(t *testing.T) {
if _, _, err := tc.RunCommandOnNode(0, line); err != nil {
t.Fatalf("fail to install embedded-cluster on node %s: %v", tc.Nodes[0], err)
}
t.Logf("checking worker profile on controller node %d", 0)
line = []string{"check-worker-profile.sh"}
if stdout, stderr, err := tc.RunCommandOnNode(0, line); err != nil {
t.Fatalf("fail to check worker profile on node %d: %v: %s: %s", 0, err, stdout, stderr)
}

// remove artifacts after installation to save space
line = []string{"rm", "/assets/release.airgap"}
if _, _, err := tc.RunCommandOnNode(0, line); err != nil {
Expand Down Expand Up @@ -2062,6 +2086,11 @@ func TestMultiNodeAirgapHAInstallation(t *testing.T) {
if stdout, stderr, err := tc.RunCommandOnNode(1, strings.Split(command, " ")); err != nil {
t.Fatalf("fail to join node 1 to the cluster as a worker: %v: %s: %s", err, stdout, stderr)
}
t.Logf("checking worker profile on worker node %d", 1)
line = []string{"check-worker-profile.sh"}
if stdout, stderr, err := tc.RunCommandOnNode(1, line); err != nil {
t.Fatalf("fail to check worker profile on node %d: %v: %s: %s", 1, err, stdout, stderr)
}
// remove the airgap bundle and binary after joining
line = []string{"rm", "/assets/release.airgap"}
if _, _, err := tc.RunCommandOnNode(1, line); err != nil {
Expand Down Expand Up @@ -2091,6 +2120,11 @@ func TestMultiNodeAirgapHAInstallation(t *testing.T) {
if _, _, err := tc.RunCommandOnNode(2, strings.Split(command, " ")); err != nil {
t.Fatalf("fail to join node 2 as a controller: %v", err)
}
t.Logf("checking worker profile on controller node %d", 2)
line = []string{"check-worker-profile.sh"}
if stdout, stderr, err := tc.RunCommandOnNode(2, line); err != nil {
t.Fatalf("fail to check worker profile on node %d: %v: %s: %s", 2, err, stdout, stderr)
}
// remove the airgap bundle and binary after joining
line = []string{"rm", "/assets/release.airgap"}
if _, _, err := tc.RunCommandOnNode(2, line); err != nil {
Expand Down Expand Up @@ -2121,6 +2155,11 @@ func TestMultiNodeAirgapHAInstallation(t *testing.T) {
if _, _, err := tc.RunCommandOnNode(3, line); err != nil {
t.Fatalf("fail to join node 3 as a controller in ha mode: %v", err)
}
t.Logf("checking worker profile on controller node %d", 3)
line = []string{"check-worker-profile.sh"}
if stdout, stderr, err := tc.RunCommandOnNode(3, line); err != nil {
t.Fatalf("fail to check worker profile on node %d: %v: %s: %s", 3, err, stdout, stderr)
}
// remove the airgap bundle and binary after joining
line = []string{"rm", "/assets/release.airgap"}
if _, _, err := tc.RunCommandOnNode(3, line); err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ spec:
spec:
telemetry:
enabled: false
workerProfiles:
- name: ip-forward
values:
allowedUnsafeSysctls:
- net.ipv4.ip_forward
extensions:
helm:
repositories:
Expand Down
5 changes: 5 additions & 0 deletions e2e/kots-release-install-legacydr/cluster-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ spec:
spec:
telemetry:
enabled: false
workerProfiles:
- name: ip-forward
values:
allowedUnsafeSysctls:
- net.ipv4.ip_forward
extensions:
helm:
repositories:
Expand Down
5 changes: 5 additions & 0 deletions e2e/kots-release-install-stable/cluster-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ spec:
spec:
telemetry:
enabled: false
workerProfiles:
- name: ip-forward
values:
allowedUnsafeSysctls:
- net.ipv4.ip_forward
extensions:
helm:
repositories:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ spec:
spec:
telemetry:
enabled: false
workerProfiles:
- name: ip-forward
values:
allowedUnsafeSysctls:
- net.ipv4.ip_forward
extensions:
helm:
repositories:
Expand Down
5 changes: 5 additions & 0 deletions e2e/kots-release-install/cluster-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ spec:
spec:
telemetry:
enabled: false
workerProfiles:
- name: ip-forward
values:
allowedUnsafeSysctls:
- net.ipv4.ip_forward
extensions:
helm:
charts:
Expand Down
5 changes: 5 additions & 0 deletions e2e/kots-release-upgrade/cluster-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ spec:
spec:
telemetry:
enabled: false
workerProfiles:
- name: ip-forward
values:
allowedUnsafeSysctls:
- net.ipv4.ip_forward
extensions:
helm:
charts:
Expand Down
2 changes: 1 addition & 1 deletion e2e/scripts/check-installation-state.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ main() {
fi

# if this is the current version in CI
if echo "$version" | grep -qvE "(pre-minio-removal|1.8.0-k8s)" ; then
if echo "$version" | grep -qvE "(pre-minio-removal|1.8.0-k8s|previous-stable)" ; then
validate_data_dirs
fi

Expand Down
11 changes: 11 additions & 0 deletions e2e/scripts/check-worker-profile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -euox pipefail

DIR=/usr/local/bin
. $DIR/common.sh

main() {
validate_worker_profile
}

main "$@"
18 changes: 18 additions & 0 deletions e2e/scripts/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -462,3 +462,21 @@ validate_no_pods_in_crashloop() {
exit 1
fi
}

validate_worker_profile() {
# if /etc/systemd/system/k0scontroller.service exists, check it - otherwise check /etc/systemd/system/k0sworker.service
if [ -f /etc/systemd/system/k0scontroller.service ]; then
if ! grep -- "--profile=ip-forward" /etc/systemd/system/k0scontroller.service >/dev/null; then
echo "expected worker profile 'ip-forward' not found in k0scontroller.service"
exit 1
fi
elif [ -f /etc/systemd/system/k0sworker.service ]; then
if ! grep -- "--profile=ip-forward" /etc/systemd/system/k0sworker.service >/dev/null; then
echo "expected worker profile 'ip-forward' not found in k0sworker.service"
exit 1
fi
else
echo "expected k0scontroller.service or k0sworker.service not found"
exit 1
fi
}
37 changes: 35 additions & 2 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package config

import (
"fmt"
"os"
"strings"

jsonpatch "github.com/evanphx/json-patch"
Expand Down Expand Up @@ -95,7 +96,7 @@ func PatchK0sConfig(config *k0sconfig.ClusterConfig, patch string) (*k0sconfig.C
}

// InstallFlags returns a list of default flags to be used when bootstrapping a k0s cluster.
func InstallFlags(nodeIP string) []string {
func InstallFlags(nodeIP string) ([]string, error) {
flags := []string{
"install",
"controller",
Expand All @@ -104,9 +105,14 @@ func InstallFlags(nodeIP string) []string {
"--no-taints",
"-c", runtimeconfig.PathToK0sConfig(),
}
profile, err := ProfileInstallFlag()
if err != nil {
return nil, fmt.Errorf("unable to get profile install flag: %w", err)
}
flags = append(flags, profile)
flags = append(flags, AdditionalInstallFlags(nodeIP)...)
flags = append(flags, AdditionalInstallFlagsController()...)
return flags
return flags, nil
}

func AdditionalInstallFlags(nodeIP string) []string {
Expand All @@ -125,6 +131,14 @@ func AdditionalInstallFlagsController() []string {
}
}

func ProfileInstallFlag() (string, error) {
controllerProfile, err := controllerWorkerProfile()
if err != nil {
return "", fmt.Errorf("unable to get controller worker profile: %w", err)
}
return "--profile=" + controllerProfile, nil
}

// nodeLabels return a slice of string with labels (key=value format) for the node where we
// are installing the k0s.
func nodeLabels() []string {
Expand Down Expand Up @@ -163,6 +177,25 @@ func additionalControllerLabels() map[string]string {
return map[string]string{}
}

func controllerWorkerProfile() (string, error) {
// Read the k0s config file
data, err := os.ReadFile(runtimeconfig.PathToK0sConfig())
if err != nil {
return "", fmt.Errorf("unable to read k0s config: %w", err)
}

var cfg k0sconfig.ClusterConfig
if err := k8syaml.Unmarshal(data, &cfg); err != nil {
return "", fmt.Errorf("unable to unmarshal k0s config: %w", err)
}

// Return the first worker profile name if any exist
if len(cfg.Spec.WorkerProfiles) > 0 {
return cfg.Spec.WorkerProfiles[0].Name, nil
}
return "", nil
}

func AdditionalCharts() []embeddedclusterv1beta1.Chart {
clusterConfig := release.GetEmbeddedClusterConfig()
if clusterConfig != nil {
Expand Down
24 changes: 24 additions & 0 deletions pkg/helpers/k0s.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,30 @@ func K0sClusterConfigTo129Compat(clusterConfig *k0sv1beta1.ClusterConfig) (*unst
return nil, fmt.Errorf("convert to unstructured: %w", err)
}
unst := obj.UnstructuredContent()

// check the entire spec path before attempting to access "charts"
if unst["spec"] == nil {
return obj, nil
}
if _, ok := unst["spec"].(map[string]interface{}); !ok {
return obj, nil
}
if _, ok := unst["spec"].(map[string]interface{})["extensions"]; !ok {
return obj, nil
}
if _, ok := unst["spec"].(map[string]interface{})["extensions"].(map[string]interface{}); !ok {
return obj, nil
}
if _, ok := unst["spec"].(map[string]interface{})["extensions"].(map[string]interface{})["helm"]; !ok {
return obj, nil
}
if _, ok := unst["spec"].(map[string]interface{})["extensions"].(map[string]interface{})["helm"].(map[string]interface{}); !ok {
return obj, nil
}
if _, ok := unst["spec"].(map[string]interface{})["extensions"].(map[string]interface{})["helm"].(map[string]interface{})["charts"]; !ok {
return obj, nil
}

charts, ok := unst["spec"].(map[string]interface{})["extensions"].(map[string]interface{})["helm"].(map[string]interface{})["charts"].([]interface{})
if !ok {
return obj, nil
Expand Down
Loading
Loading