Skip to content

Commit 8c2cc34

Browse files
committed
fix(ha): make more resilient with a job
1 parent cc46758 commit 8c2cc34

File tree

23 files changed

+1119
-339
lines changed

23 files changed

+1119
-339
lines changed

cmd/installer/cli/enable_ha.go

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package cli
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
8+
"github.com/replicatedhq/embedded-cluster/pkg/addons"
9+
"github.com/replicatedhq/embedded-cluster/pkg/helm"
10+
"github.com/replicatedhq/embedded-cluster/pkg/kubeutils"
11+
"github.com/replicatedhq/embedded-cluster/pkg/runtimeconfig"
12+
rcutil "github.com/replicatedhq/embedded-cluster/pkg/runtimeconfig/util"
13+
"github.com/replicatedhq/embedded-cluster/pkg/versions"
14+
"github.com/sirupsen/logrus"
15+
"github.com/spf13/cobra"
16+
)
17+
18+
// EnableHACmd is the command for enabling HA mode.
19+
func EnableHACmd(ctx context.Context, name string) *cobra.Command {
20+
cmd := &cobra.Command{
21+
Use: "enable-ha",
22+
Short: fmt.Sprintf("Enable high availability for the %s cluster", name),
23+
Hidden: true,
24+
PreRunE: func(cmd *cobra.Command, args []string) error {
25+
if os.Getuid() != 0 {
26+
return fmt.Errorf("enable-ha command must be run as root")
27+
}
28+
29+
rcutil.InitBestRuntimeConfig(cmd.Context())
30+
31+
os.Setenv("KUBECONFIG", runtimeconfig.PathToKubeConfig())
32+
os.Setenv("TMPDIR", runtimeconfig.EmbeddedClusterTmpSubDir())
33+
34+
return nil
35+
},
36+
PostRun: func(cmd *cobra.Command, args []string) {
37+
runtimeconfig.Cleanup()
38+
},
39+
RunE: func(cmd *cobra.Command, args []string) error {
40+
if err := runEnableHA(cmd.Context()); err != nil {
41+
return err
42+
}
43+
44+
return nil
45+
},
46+
}
47+
48+
return cmd
49+
}
50+
51+
func runEnableHA(ctx context.Context) error {
52+
kcli, err := kubeutils.KubeClient()
53+
if err != nil {
54+
return fmt.Errorf("unable to get kube client: %w", err)
55+
}
56+
57+
canEnableHA, reason, err := addons.CanEnableHA(ctx, kcli)
58+
if err != nil {
59+
return fmt.Errorf("unable to check if HA can be enabled: %w", err)
60+
}
61+
if !canEnableHA {
62+
logrus.Warnf("High availability cannot be enabled: %s", reason)
63+
return NewErrorNothingElseToAdd(fmt.Errorf("high availability cannot be enabled: %s", reason))
64+
}
65+
66+
kclient, err := kubeutils.GetClientset()
67+
if err != nil {
68+
return fmt.Errorf("unable to create kubernetes client: %w", err)
69+
}
70+
71+
in, err := kubeutils.GetLatestInstallation(ctx, kcli)
72+
if err != nil {
73+
return fmt.Errorf("unable to get latest installation: %w", err)
74+
}
75+
76+
airgapChartsPath := ""
77+
if in.Spec.AirGap {
78+
airgapChartsPath = runtimeconfig.EmbeddedClusterChartsSubDir()
79+
}
80+
81+
hcli, err := helm.NewClient(helm.HelmOptions{
82+
KubeConfig: runtimeconfig.PathToKubeConfig(),
83+
K0sVersion: versions.K0sVersion,
84+
AirgapPath: airgapChartsPath,
85+
})
86+
if err != nil {
87+
return fmt.Errorf("unable to create helm client: %w", err)
88+
}
89+
defer hcli.Close()
90+
91+
return addons.EnableHA(ctx, kcli, kclient, hcli, in.Spec.AirGap, in.Spec.Network.ServiceCIDR, in.Spec.Proxy, in.Spec.Config)
92+
}

cmd/installer/cli/install.go

-3
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,6 @@ type InstallCmdFlags struct {
7676
}
7777

7878
// InstallCmd returns a cobra command for installing the embedded cluster.
79-
// This is the upcoming version of install without the operator and where
80-
// install does all of the work. This is a hidden command until it's tested
81-
// and ready.
8279
func InstallCmd(ctx context.Context, name string) *cobra.Command {
8380
var flags InstallCmdFlags
8481

cmd/installer/cli/join.go

+26-22
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/sirupsen/logrus"
2727
"github.com/spf13/cobra"
2828
"gopkg.in/yaml.v2"
29+
"k8s.io/client-go/kubernetes"
2930
"sigs.k8s.io/controller-runtime/pkg/client"
3031
k8syaml "sigs.k8s.io/yaml"
3132
)
@@ -40,9 +41,7 @@ type JoinCmdFlags struct {
4041
ignoreHostPreflights bool
4142
}
4243

43-
// This is the upcoming version of join without the operator and where
44-
// join does all of the work. This is a hidden command until it's tested
45-
// and ready.
44+
// JoinCmd returns a cobra command for joining a node to the cluster.
4645
func JoinCmd(ctx context.Context, name string) *cobra.Command {
4746
var flags JoinCmdFlags
4847

@@ -171,21 +170,6 @@ func runJoin(ctx context.Context, name string, flags JoinCmdFlags, jcmd *kotsadm
171170
return fmt.Errorf("unable to get kube client: %w", err)
172171
}
173172

174-
airgapChartsPath := ""
175-
if flags.isAirgap {
176-
airgapChartsPath = runtimeconfig.EmbeddedClusterChartsSubDir()
177-
}
178-
179-
hcli, err := helm.NewClient(helm.HelmOptions{
180-
KubeConfig: runtimeconfig.PathToKubeConfig(),
181-
K0sVersion: versions.K0sVersion,
182-
AirgapPath: airgapChartsPath,
183-
})
184-
if err != nil {
185-
return fmt.Errorf("unable to create helm client: %w", err)
186-
}
187-
defer hcli.Close()
188-
189173
hostname, err := os.Hostname()
190174
if err != nil {
191175
return fmt.Errorf("unable to get hostname: %w", err)
@@ -196,7 +180,27 @@ func runJoin(ctx context.Context, name string, flags JoinCmdFlags, jcmd *kotsadm
196180
}
197181

198182
if flags.enableHighAvailability {
199-
if err := maybeEnableHA(ctx, kcli, hcli, flags.isAirgap, cidrCfg.ServiceCIDR, jcmd.InstallationSpec.Proxy, jcmd.InstallationSpec.Config); err != nil {
183+
kclient, err := kubeutils.GetClientset()
184+
if err != nil {
185+
return fmt.Errorf("unable to create kubernetes client: %w", err)
186+
}
187+
188+
airgapChartsPath := ""
189+
if flags.isAirgap {
190+
airgapChartsPath = runtimeconfig.EmbeddedClusterChartsSubDir()
191+
}
192+
193+
hcli, err := helm.NewClient(helm.HelmOptions{
194+
KubeConfig: runtimeconfig.PathToKubeConfig(),
195+
K0sVersion: versions.K0sVersion,
196+
AirgapPath: airgapChartsPath,
197+
})
198+
if err != nil {
199+
return fmt.Errorf("unable to create helm client: %w", err)
200+
}
201+
defer hcli.Close()
202+
203+
if err := maybeEnableHA(ctx, kcli, kclient, hcli, flags.isAirgap, cidrCfg.ServiceCIDR, jcmd.InstallationSpec.Proxy, jcmd.InstallationSpec.Config); err != nil {
200204
return fmt.Errorf("unable to enable high availability: %w", err)
201205
}
202206
}
@@ -460,8 +464,8 @@ func waitForNode(ctx context.Context, kcli client.Client, hostname string) error
460464
return nil
461465
}
462466

463-
func maybeEnableHA(ctx context.Context, kcli client.Client, hcli helm.Client, isAirgap bool, serviceCIDR string, proxy *ecv1beta1.ProxySpec, cfgspec *ecv1beta1.ConfigSpec) error {
464-
canEnableHA, err := addons.CanEnableHA(ctx, kcli)
467+
func maybeEnableHA(ctx context.Context, kcli client.Client, kclient kubernetes.Interface, hcli helm.Client, isAirgap bool, serviceCIDR string, proxy *ecv1beta1.ProxySpec, cfgspec *ecv1beta1.ConfigSpec) error {
468+
canEnableHA, _, err := addons.CanEnableHA(ctx, kcli)
465469
if err != nil {
466470
return fmt.Errorf("unable to check if HA can be enabled: %w", err)
467471
}
@@ -476,5 +480,5 @@ func maybeEnableHA(ctx context.Context, kcli client.Client, hcli helm.Client, is
476480
return nil
477481
}
478482
logrus.Info("")
479-
return addons.EnableHA(ctx, kcli, hcli, isAirgap, serviceCIDR, proxy, cfgspec)
483+
return addons.EnableHA(ctx, kcli, kclient, hcli, isAirgap, serviceCIDR, proxy, cfgspec)
480484
}

cmd/installer/cli/root.go

+1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ func RootCmd(ctx context.Context, name string) *cobra.Command {
8989
cmd.AddCommand(JoinCmd(ctx, name))
9090
cmd.AddCommand(ShellCmd(ctx, name))
9191
cmd.AddCommand(NodeCmd(ctx, name))
92+
cmd.AddCommand(EnableHACmd(ctx, name))
9293
cmd.AddCommand(VersionCmd(ctx, name))
9394
cmd.AddCommand(ResetCmd(ctx, name))
9495
cmd.AddCommand(MaterializeCmd(ctx, name))

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ require (
4141
github.com/vmware-tanzu/velero v1.15.2
4242
go.uber.org/multierr v1.11.0
4343
golang.org/x/crypto v0.33.0
44-
golang.org/x/sync v0.11.0
4544
golang.org/x/term v0.29.0
4645
gopkg.in/yaml.v2 v2.4.0
4746
gopkg.in/yaml.v3 v3.0.1
@@ -272,6 +271,7 @@ require (
272271
go.opentelemetry.io/otel/trace v1.34.0 // indirect
273272
golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 // indirect
274273
golang.org/x/mod v0.22.0 // indirect
274+
golang.org/x/sync v0.11.0 // indirect
275275
golang.org/x/tools v0.28.0 // indirect
276276
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
277277
google.golang.org/api v0.197.0 // indirect

operator/pkg/cli/migrate.go

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package cli
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/replicatedhq/embedded-cluster/pkg/addons/registry/migrate"
7+
"github.com/replicatedhq/embedded-cluster/pkg/kubeutils"
8+
"github.com/spf13/cobra"
9+
)
10+
11+
func MigrateCmd() *cobra.Command {
12+
cmd := &cobra.Command{
13+
Use: "migrate",
14+
Short: "Run the specified migration",
15+
}
16+
17+
cmd.AddCommand(
18+
MigrateRegistryDataCmd(),
19+
)
20+
21+
return cmd
22+
}
23+
24+
func MigrateRegistryDataCmd() *cobra.Command {
25+
cmd := &cobra.Command{
26+
Use: "registry-data",
27+
Short: "Run the registry-data migration",
28+
SilenceUsage: true,
29+
RunE: func(cmd *cobra.Command, args []string) error {
30+
ctx := cmd.Context()
31+
32+
cli, err := kubeutils.KubeClient()
33+
if err != nil {
34+
return fmt.Errorf("failed to create kubernetes client: %w", err)
35+
}
36+
37+
err = migrate.RegistryData(ctx, cli)
38+
if err != nil {
39+
return fmt.Errorf("failed to migrate registry data: %w", err)
40+
}
41+
return nil
42+
},
43+
}
44+
45+
return cmd
46+
}

operator/pkg/cli/root.go

+1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ func addSubcommands(cmd *cobra.Command) {
117117
cmd.AddCommand(
118118
UpgradeCmd(),
119119
UpgradeJobCmd(),
120+
MigrateCmd(),
120121
MigrateV2Cmd(),
121122
VersionCmd(),
122123
)

pkg/addons/embeddedclusteroperator/upgrade.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ package embeddedclusteroperator
22

33
import (
44
"context"
5-
"log/slog"
65

76
"github.com/pkg/errors"
87
"github.com/replicatedhq/embedded-cluster/pkg/helm"
8+
"github.com/sirupsen/logrus"
99
"sigs.k8s.io/controller-runtime/pkg/client"
1010
)
1111

@@ -15,7 +15,7 @@ func (e *EmbeddedClusterOperator) Upgrade(ctx context.Context, kcli client.Clien
1515
return errors.Wrap(err, "check if release exists")
1616
}
1717
if !exists {
18-
slog.Info("Release not found, installing", "release", releaseName, "namespace", namespace)
18+
logrus.Debugf("Release not found, installing release %s in namespace %s", releaseName, namespace)
1919
if err := e.Install(ctx, kcli, hcli, overrides, nil); err != nil {
2020
return errors.Wrap(err, "install")
2121
}

0 commit comments

Comments
 (0)