diff --git a/sn-manager/README.md b/sn-manager/README.md index bce03956..c9613183 100644 --- a/sn-manager/README.md +++ b/sn-manager/README.md @@ -5,19 +5,29 @@ SuperNode Process Manager with Automatic Updates ## Installation Download and install sn-manager: +Note: Supported on Linux x86_64 (amd64). Other architectures are not yet supported. ```bash + # Download and extract -curl -L https://github.com/LumeraProtocol/supernode/v2/releases/latest/download/supernode-linux-amd64.tar.gz | tar -xz +# Always fetch the latest stable release asset +curl -L https://github.com/LumeraProtocol/supernode/releases/latest/download/supernode-linux-amd64.tar.gz | tar -xz -# Install sn-manager only (supernode will be managed automatically) -chmod +x sn-manager -sudo mv sn-manager /usr/local/bin/ +# Install sn-manager to a user-writable location (enables self-update) +install -D -m 0755 sn-manager "$HOME/.sn-manager/bin/sn-manager" # Verify -sn-manager version +"$HOME/.sn-manager/bin/sn-manager" version + +# Optional: add to PATH for convenience +echo 'export PATH="$HOME/.sn-manager/bin:$PATH"' >> ~/.bashrc +source ~/.bashrc && hash -r + +# Confirm the path used resolves to our install first +command -v -a sn-manager +readlink -f "$(command -v sn-manager)" ``` -Note: SuperNode binary will be automatically downloaded and managed by sn-manager during initialization. +Note: SuperNode binary will be automatically downloaded and managed by sn-manager during initialization. Installing sn-manager under your home directory allows it to auto-update itself. ## Systemd Service Setup @@ -31,7 +41,7 @@ After=network-online.target [Service] User= -ExecStart=/usr/local/bin/sn-manager start +ExecStart=/home//.sn-manager/bin/sn-manager start Restart=on-failure RestartSec=10 LimitNOFILE=65536 @@ -47,6 +57,35 @@ sudo systemctl enable --now sn-manager journalctl -u sn-manager -f ``` +Stop/start later or restart after changes: +```bash +sudo systemctl stop sn-manager +sudo systemctl start sn-manager + +# or +sudo systemctl restart sn-manager +journalctl -u sn-manager -f +``` + +### Ensure PATH points to user install (Required for self-update) + +To ensure self-update works and avoid conflicts, make sure your shell resolves to the user-writable install: + +```bash +# List all sn-manager binaries found on PATH (our path should be first) +command -v -a sn-manager + +# Remove any global copy (e.g., /usr/local/bin/sn-manager) +sudo rm -f /usr/local/bin/sn-manager || true + +# Clear shell command cache and verify again +hash -r +command -v -a sn-manager +readlink -f "$(command -v sn-manager)" +``` + +The systemd unit uses an absolute `ExecStart` pointing to your home directory, so the service will always run the intended binary regardless of PATH. + ## Initialization ### Interactive Mode @@ -54,7 +93,7 @@ journalctl -u sn-manager -f sn-manager init ``` -> !!! If Supernode was already initialized before, use `sn-manager init` without parameters OR with SN-Manager only flags (see bellow)!!! +> !!! If SuperNode was already initialized before, use `sn-manager init` without parameters OR with SN-Manager-only flags (see below)!!! ### Non-Interactive Mode @@ -92,11 +131,18 @@ sn-manager init -y \ --chain-id lumera-testnet-2 ``` +Disable auto-upgrade non-interactively: +```bash +sn-manager init -y --auto-upgrade false +``` + ### Flags +Note: Unrecognized flags to `sn-manager init` are passed through to the underlying `supernode init`. + **SN-Manager flags:** - `--force` - Override existing configuration -- `--auto-upgrade` - Enable automatic updates +- `--auto-upgrade` or `--auto-upgrade true|false` - Enable/disable automatic updates (default: enabled) - `--check-interval` - Update check interval in seconds **SuperNode flags (passed through):** @@ -116,15 +162,95 @@ sn-manager init -y \ ## Commands - `init` - Initialize sn-manager and SuperNode -- `start` - Start SuperNode -- `stop` - Stop SuperNode +- `start` - Start sn-manager and SuperNode +- `stop` - Stop sn-manager and SuperNode - `status` - Show status - `version` - Show version - `get ` - Download version - `use ` - Switch version - `ls` - List installed versions -- `ls-remote` - List available versions +- `ls-remote` - List available stable versions - `check` - Check for updates +- `supernode start` - Start SuperNode (requires sn-manager running) +- `supernode stop` - Stop SuperNode and prevent auto-restart +- `supernode status` - Show SuperNode status + +## Version Update Scenarios + +The auto-updater follows stable-only, same-major update rules and defers updates while the gateway is busy. Summary: + +| Current | Available | Auto-Upgrade Enabled | Auto Updates? | Manual Option | +|---|---|---|---|---| +| v1.7.1 | v1.7.4 (stable) | Yes | ✅ | — | +| v1.7.1-beta | v1.7.1 (stable) | Yes | ✅ | — | +| v1.7.4 | v1.8.0 (stable) | Yes | ❌ | `sn-manager get v1.8.0 && sn-manager use v1.8.0` | +| v1.7.4 | v1.8.0-rc1 (pre-release) | Yes | ❌ | `sn-manager get v1.8.0-rc1 && sn-manager use v1.8.0-rc1` | +| v1.7.4 | v1.7.4 (stable) | Yes | ❌ | — | +| v1.7.5 | v1.7.4 (stable) | Yes | ❌ | — | +| Any | Any | No | ❌ | `sn-manager get [version] && sn-manager use [version]` | +| Any | Any | Yes, but gateway busy | ❌ (deferred) | Manual allowed | +| Manager v1.7.0 | Release v1.7.4 (contains sn-manager) | Yes | ✅ (self-update) | Reinstall if needed | +| Manager v1.7.x | Release v1.8.0 | Yes | ❌ (self-update) | Reinstall new sn-manager | + +Mechanics and notes: +- Stable-only: auto-updater targets latest stable GitHub release (non-draft, non-prerelease). +- Same-major only: SuperNode and sn-manager auto-update only when the latest is the same major version as the current. +- Gateway-aware: updates are applied only when the gateway reports no running tasks; otherwise they are deferred. +- Gateway errors: repeated check failures over a 5-minute window request a clean SuperNode restart (no version change) to recover. +- Combined tarball: when updating, sn-manager downloads a single tarball once, then updates itself first (if eligible), then installs/activates the new SuperNode version. +- Config is updated to reflect the new `updates.current_version` after a successful SuperNode update. +- Manual installs: you can always override with `sn-manager get ` and `sn-manager use `; pre-releases are supported manually. + +## Start/Stop Behavior + +sn-manager start and supernode start clear the stop marker; supernode stop sets it. How the manager and SuperNode processes behave for each command, plus systemd nuances: + +| Action | Manager | SuperNode | Marker | systemd (unit uses `Restart=on-failure`) | +|---|---|---|---|---| +| `sn-manager start` | Starts manager ✅ | Starts if no stop marker ✅ | Clears `.stop_requested` if present | Start via `systemctl start sn-manager` when running under systemd | +| `sn-manager stop` | Stops manager ✅ | Stops (graceful, then forced if needed) ✅ | — | Will NOT be restarted by systemd (clean exit) ❌ | +| `sn-manager status` | Reads PID | Reports running/not and versions | — | — | +| `sn-manager supernode start` | Stays running | Starts SuperNode ✅ | Removes `.stop_requested` | — | +| `sn-manager supernode stop` | Stays running | Stops SuperNode ✅ | Writes `.stop_requested` | — | +| SuperNode crash | Stays running | Auto-restarts after backoff ✅ | Skipped if `.stop_requested` present ❌ | — | +| Manager crash | Exits abnormally | — | — | systemd restarts manager ✅ | + +Notes: +- Clean exit vs. systemd: If systemd started sn-manager and you run `sn-manager stop`, the manager exits cleanly. With `Restart=on-failure`, systemd does not restart it. Use `systemctl start sn-manager` (or `systemctl restart sn-manager`) to run it again. If you want automatic restarts after clean exits, change the unit to `Restart=always` (not generally recommended as it fights the `stop` intent). +- Stop marker: `.stop_requested` prevents automatic SuperNode restarts by the manager until cleared. `sn-manager supernode start` clears it; `sn-manager start` also clears it on launch. +- PID files: Manager writes `~/.sn-manager/sn-manager.pid`; SuperNode writes `~/.sn-manager/supernode.pid`. Stale PID files are detected and cleaned up. + +## Migration for Existing sn-manager Users + +If you already run sn-manager, you can align with this guide without re-initializing. + +1) Check your current install +- Show paths: `command -v -a sn-manager` and `readlink -f "$(command -v sn-manager)"`. +- Required for self-update: install at `~/.sn-manager/bin/sn-manager` (must be user-writable). +- If you currently use `/usr/local/bin/sn-manager`, self-update will not work reliably. Switch to the user path and remove the global copy: + `sudo rm -f /usr/local/bin/sn-manager && hash -r` + +2) Reinstall to user path (required for self-update) +```bash +curl -L https://github.com/LumeraProtocol/supernode/releases/latest/download/supernode-linux-amd64.tar.gz | tar -xz +install -D -m 0755 sn-manager "$HOME/.sn-manager/bin/sn-manager" +echo 'export PATH="$HOME/.sn-manager/bin:$PATH"' >> ~/.bashrc +source ~/.bashrc && hash -r +sn-manager version +``` + +3) Keep existing data +- No changes to `~/.supernode` or `~/.sn-manager` are required. +- Do not re-run `supernode init`; your keys and config remain intact. + +4) Update or create the systemd unit +- Use the unit from this README. Ensure `ExecStart` points to the intended binary path and set `Environment=HOME=...` and `WorkingDirectory=...` for your user. +- With `Restart=on-failure`, `sn-manager stop` will cleanly exit and systemd will not restart it; start again with `sudo systemctl start sn-manager`. + +5) Verify and adopt +- Manager status: `sn-manager status` +- Check updates: `sn-manager check` + ## Configuration @@ -137,9 +263,24 @@ updates: ``` **Reset:** + +Reset managed data while keeping the installed sn-manager binary: +```bash +sudo systemctl stop sn-manager +rm -rf ~/.supernode/ +rm -rf ~/.sn-manager/binaries ~/.sn-manager/downloads ~/.sn-manager/current ~/.sn-manager/config.yml +sn-manager init +``` + +Full reset (also removes the sn-manager binary; you will need to reinstall it): ```bash sudo systemctl stop sn-manager rm -rf ~/.sn-manager/ ~/.supernode/ +# Reinstall sn-manager as shown in Installation, then: sn-manager init ``` +## Notes + +- By default, `sn-manager start` starts both the manager and SuperNode. You can later control SuperNode independently with `sn-manager supernode start|stop|status`. +- Auto-updates use the latest stable release and apply within the same major version. A single release bundle is downloaded and used to update both sn-manager and SuperNode. diff --git a/sn-manager/cmd/check.go b/sn-manager/cmd/check.go index e3e7e530..df20b2a5 100644 --- a/sn-manager/cmd/check.go +++ b/sn-manager/cmd/check.go @@ -1,13 +1,14 @@ package cmd import ( - "fmt" + "fmt" + "strings" - "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/config" - "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/github" - "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/updater" - "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/utils" - "github.com/spf13/cobra" + "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/config" + "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/github" + "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/updater" + "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/utils" + "github.com/spf13/cobra" ) var checkCmd = &cobra.Command{ @@ -31,8 +32,8 @@ func runCheck(cmd *cobra.Command, args []string) error { fmt.Println("Checking for updates...") - // Create GitHub client - client := github.NewClient(config.GitHubRepo) + // Create GitHub client + client := github.NewClient(config.GitHubRepo) // Get latest stable release release, err := client.GetLatestStableRelease() @@ -40,17 +41,25 @@ func runCheck(cmd *cobra.Command, args []string) error { return fmt.Errorf("failed to check for stable updates: %w", err) } - fmt.Printf("\nLatest release: %s\n", release.TagName) - fmt.Printf("Current version: %s\n", cfg.Updates.CurrentVersion) + fmt.Printf("\nLatest release: %s\n", release.TagName) + fmt.Printf("Current version: %s\n", cfg.Updates.CurrentVersion) + // Report manager version and if it would update under the same policy + mv := strings.TrimSpace(appVersion) + if mv != "" && mv != "dev" && !strings.EqualFold(mv, "unknown") { + managerWould := utils.SameMajor(mv, release.TagName) && utils.CompareVersions(mv, release.TagName) < 0 + fmt.Printf("Manager version: %s (would update: %v)\n", mv, managerWould) + } else { + fmt.Printf("Manager version: %s\n", appVersion) + } - // Compare versions - cmp := utils.CompareVersions(cfg.Updates.CurrentVersion, release.TagName) + // Compare versions + cmp := utils.CompareVersions(cfg.Updates.CurrentVersion, release.TagName) if cmp < 0 { // Use the same logic as auto-updater to determine update eligibility - managerHome := config.GetManagerHome() - autoUpdater := updater.New(managerHome, cfg) - wouldAutoUpdate := autoUpdater.ShouldUpdate(cfg.Updates.CurrentVersion, release.TagName) + managerHome := config.GetManagerHome() + autoUpdater := updater.New(managerHome, cfg, appVersion) + wouldAutoUpdate := autoUpdater.ShouldUpdate(cfg.Updates.CurrentVersion, release.TagName) if wouldAutoUpdate { fmt.Printf("\n✓ Update available: %s → %s\n", cfg.Updates.CurrentVersion, release.TagName) diff --git a/sn-manager/cmd/get.go b/sn-manager/cmd/get.go index 54d3c214..df089484 100644 --- a/sn-manager/cmd/get.go +++ b/sn-manager/cmd/get.go @@ -2,6 +2,7 @@ package cmd import ( "fmt" + "log" "os" "path/filepath" @@ -29,16 +30,13 @@ func runGet(cmd *cobra.Command, args []string) error { var targetVersion string if len(args) == 0 { - release, err := client.GetLatestRelease() + release, err := client.GetLatestStableRelease() if err != nil { return fmt.Errorf("failed to get latest release: %w", err) } targetVersion = release.TagName } else { - targetVersion = args[0] - if targetVersion[0] != 'v' { - targetVersion = "v" + targetVersion - } + targetVersion = normalizeVersionTag(args[0]) } fmt.Printf("Target version: %s\n", targetVersion) @@ -54,28 +52,20 @@ func runGet(cmd *cobra.Command, args []string) error { } tempFile := filepath.Join(managerHome, "downloads", fmt.Sprintf("supernode-%s.tmp", targetVersion)) - - var lastPercent int - progress := func(downloaded, total int64) { - if total > 0 { - percent := int(downloaded * 100 / total) - if percent != lastPercent && percent%10 == 0 { - fmt.Printf("\rProgress: %d%%", percent) - lastPercent = percent - } - } - } + progress, done := newDownloadProgressPrinter() if err := client.DownloadBinary(downloadURL, tempFile, progress); err != nil { return fmt.Errorf("download failed: %w", err) } - fmt.Println() + done() if err := versionMgr.InstallVersion(targetVersion, tempFile); err != nil { return fmt.Errorf("install failed: %w", err) } - os.Remove(tempFile) + if err := os.Remove(tempFile); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove temp file: %v", err) + } fmt.Printf("✓ Installed %s\n", targetVersion) return nil -} \ No newline at end of file +} diff --git a/sn-manager/cmd/helpers.go b/sn-manager/cmd/helpers.go index 5d1b79c1..860d16ba 100644 --- a/sn-manager/cmd/helpers.go +++ b/sn-manager/cmd/helpers.go @@ -4,28 +4,112 @@ import ( "fmt" "os" "path/filepath" + "strconv" + "strings" + "syscall" "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/config" ) +// Common file markers and names used by the manager +const ( + managerPIDFile = "sn-manager.pid" + supernodePIDFile = "supernode.pid" + stopMarkerFile = ".stop_requested" +) + func checkInitialized() error { homeDir := config.GetManagerHome() configPath := filepath.Join(homeDir, "config.yml") - + if _, err := os.Stat(configPath); os.IsNotExist(err) { return fmt.Errorf("not initialized. Run: sn-manager init") } - + return nil } func loadConfig() (*config.Config, error) { homeDir := config.GetManagerHome() configPath := filepath.Join(homeDir, "config.yml") - + return config.Load(configPath) } func getHomeDir() string { return config.GetManagerHome() -} \ No newline at end of file +} + +// ensureSupernodeInitialized validates the user's SuperNode config exists. +func ensureSupernodeInitialized() error { + userHome, _ := os.UserHomeDir() + if userHome == "" { + userHome = os.Getenv("HOME") + } + supernodeConfigPath := filepath.Join(userHome, ".supernode", "config.yml") + if _, err := os.Stat(supernodeConfigPath); os.IsNotExist(err) { + return fmt.Errorf("SuperNode not initialized. Please run 'sn-manager init' first to configure your validator keys and network settings") + } + return nil +} + +// readPIDFromFile loads and parses a PID from a file. +func readPIDFromFile(pidPath string) (int, error) { + data, err := os.ReadFile(pidPath) + if err != nil { + return 0, err + } + pidStr := strings.TrimSpace(string(data)) + pid, err := strconv.Atoi(pidStr) + if err != nil { + return 0, fmt.Errorf("invalid PID file: %w", err) + } + return pid, nil +} + +// getProcessIfAlive returns the process if it exists and is alive. +func getProcessIfAlive(pid int) (*os.Process, bool) { + process, err := os.FindProcess(pid) + if err != nil { + return nil, false + } + if err := process.Signal(syscall.Signal(0)); err != nil { + return nil, false + } + return process, true +} + +// normalizeVersionTag ensures a version has a leading 'v'. +func normalizeVersionTag(v string) string { + if v == "" { + return v + } + if v[0] != 'v' { + return "v" + v + } + return v +} + +// newDownloadProgressPrinter returns a progress func and a done func. +// The progress func updates a single line at 10% increments; done prints a newline if needed. +func newDownloadProgressPrinter() (func(downloaded, total int64), func()) { + lastPercent := -1 + wrote := false + progress := func(downloaded, total int64) { + if total <= 0 { + return + } + percent := int(downloaded * 100 / total) + if percent != lastPercent && percent%10 == 0 { + fmt.Printf("\rProgress: %d%%", percent) + lastPercent = percent + wrote = true + } + } + done := func() { + if wrote { + fmt.Println() + } + } + return progress, done +} diff --git a/sn-manager/cmd/init.go b/sn-manager/cmd/init.go index a0353955..e568cc94 100644 --- a/sn-manager/cmd/init.go +++ b/sn-manager/cmd/init.go @@ -2,6 +2,7 @@ package cmd import ( "fmt" + "log" "os" "os/exec" "path/filepath" @@ -38,33 +39,39 @@ type initFlags struct { } func parseInitFlags(args []string) *initFlags { - flags := &initFlags{ - checkInterval: 3600, - autoUpgrade: true, - } - - // Parse flags and filter out sn-manager specific ones - for i := 0; i < len(args); i++ { - switch args[i] { - case "--check-interval": - if i+1 < len(args) { - fmt.Sscanf(args[i+1], "%d", &flags.checkInterval) - i++ // Skip the value - } - case "--auto-upgrade": - flags.autoUpgrade = true - case "--force": - flags.force = true - case "-y", "--yes": - flags.nonInteractive = true - // Pass through to supernode as well - flags.supernodeArgs = append(flags.supernodeArgs, args[i]) - - default: - // Pass all other args to supernode - flags.supernodeArgs = append(flags.supernodeArgs, args[i]) - } - } + flags := &initFlags{ + checkInterval: config.DefaultUpdateCheckInterval, + autoUpgrade: true, + } + + // Parse flags and filter out sn-manager specific ones + for i := 0; i < len(args); i++ { + switch args[i] { + case "--check-interval": + if i+1 < len(args) { + fmt.Sscanf(args[i+1], "%d", &flags.checkInterval) + i++ // Skip the value + } + case "--auto-upgrade": + // Allow --auto-upgrade or --auto-upgrade=true/false + if i+1 < len(args) && (args[i+1] == "true" || args[i+1] == "false") { + flags.autoUpgrade = (args[i+1] == "true") + i++ + } else { + flags.autoUpgrade = true + } + case "--force": + flags.force = true + case "-y", "--yes": + flags.nonInteractive = true + // Pass through to supernode as well + flags.supernodeArgs = append(flags.supernodeArgs, args[i]) + + default: + // Pass all other args to supernode + flags.supernodeArgs = append(flags.supernodeArgs, args[i]) + } + } return flags } @@ -95,16 +102,16 @@ func promptForManagerConfig(flags *initFlags) error { var intervalStr string inputPrompt := &survey.Input{ Message: "Update check interval (seconds):", - Default: "3600", - Help: "How often to check for updates (3600 = 1 hour)", + Default: fmt.Sprintf("%d", config.DefaultUpdateCheckInterval), + Help: fmt.Sprintf("How often to check for updates (%d = 1 hour)", config.DefaultUpdateCheckInterval), } if err := survey.AskOne(inputPrompt, &intervalStr); err != nil { return err } interval, err := strconv.Atoi(intervalStr) if err != nil || interval < 60 { - fmt.Println("Invalid interval, using default (3600)") - flags.checkInterval = 3600 + fmt.Printf("Invalid interval, using default (%d)\n", config.DefaultUpdateCheckInterval) + flags.checkInterval = config.DefaultUpdateCheckInterval } else { flags.checkInterval = interval } @@ -199,22 +206,12 @@ func runInit(cmd *cobra.Command, args []string) error { // Download to temp file tempFile := filepath.Join(managerHome, "downloads", fmt.Sprintf("supernode-%s.tmp", targetVersion)) - // Download with progress - var lastPercent int - progress := func(downloaded, total int64) { - if total > 0 { - percent := int(downloaded * 100 / total) - if percent != lastPercent && percent%10 == 0 { - fmt.Printf("\rProgress: %d%%", percent) - lastPercent = percent - } - } - } - - if err := client.DownloadBinary(downloadURL, tempFile, progress); err != nil { - return fmt.Errorf("failed to download binary: %w", err) - } - fmt.Println() // New line after progress + // Download with progress + progress, done := newDownloadProgressPrinter() + if err := client.DownloadBinary(downloadURL, tempFile, progress); err != nil { + return fmt.Errorf("failed to download binary: %w", err) + } + done() // Install the version if err := versionMgr.InstallVersion(targetVersion, tempFile); err != nil { @@ -222,7 +219,9 @@ func runInit(cmd *cobra.Command, args []string) error { } // Clean up temp file - os.Remove(tempFile) + if err := os.Remove(tempFile); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove temp file: %v", err) + } } // Set as current version diff --git a/sn-manager/cmd/ls-remote.go b/sn-manager/cmd/ls-remote.go index 985b9d45..65619fd1 100644 --- a/sn-manager/cmd/ls-remote.go +++ b/sn-manager/cmd/ls-remote.go @@ -15,29 +15,37 @@ var lsRemoteCmd = &cobra.Command{ } func runLsRemote(cmd *cobra.Command, args []string) error { - client := github.NewClient(config.GitHubRepo) - - releases, err := client.ListReleases() - if err != nil { - return fmt.Errorf("failed to list releases: %w", err) - } - - if len(releases) == 0 { - fmt.Println("No releases found") - return nil - } - - fmt.Println("Available versions:") - for i, release := range releases { - if i == 0 { - fmt.Printf(" %s (latest) - %s\n", release.TagName, release.PublishedAt.Format("2006-01-02")) - } else { - fmt.Printf(" %s - %s\n", release.TagName, release.PublishedAt.Format("2006-01-02")) - } - if i >= 9 { - break - } - } + client := github.NewClient(config.GitHubRepo) + + releases, err := client.ListReleases() + if err != nil { + return fmt.Errorf("failed to list releases: %w", err) + } + + // Filter to stable (non-draft, non-prerelease) + var stable []*github.Release + for _, r := range releases { + if !r.Draft && !r.Prerelease { + stable = append(stable, r) + } + } + + if len(stable) == 0 { + fmt.Println("No releases found") + return nil + } + + fmt.Println("Available versions:") + for i, release := range stable { + if i == 0 { + fmt.Printf(" %s (latest) - %s\n", release.TagName, release.PublishedAt.Format("2006-01-02")) + } else { + fmt.Printf(" %s - %s\n", release.TagName, release.PublishedAt.Format("2006-01-02")) + } + if i >= 9 { + break + } + } return nil -} \ No newline at end of file +} diff --git a/sn-manager/cmd/root.go b/sn-manager/cmd/root.go index b7dbca16..1e766f2b 100644 --- a/sn-manager/cmd/root.go +++ b/sn-manager/cmd/root.go @@ -48,6 +48,8 @@ func init() { rootCmd.AddCommand(statusCmd) rootCmd.AddCommand(checkCmd) rootCmd.AddCommand(versionCmd) + // SuperNode group + rootCmd.AddCommand(supernodeCmd) } // versionCmd shows version information diff --git a/sn-manager/cmd/start.go b/sn-manager/cmd/start.go index c13a8df6..7176c6cc 100644 --- a/sn-manager/cmd/start.go +++ b/sn-manager/cmd/start.go @@ -7,8 +7,9 @@ import ( "os" "os/signal" "path/filepath" + "strconv" + "strings" "syscall" - "time" "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/config" "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/github" @@ -25,6 +26,7 @@ var startCmd = &cobra.Command{ The manager will: - Launch the SuperNode process +- Monitor the process and restart on crashes - Check for updates periodically (if auto-upgrade is enabled) - Perform automatic updates (if auto-upgrade is enabled)`, RunE: runStart, @@ -38,6 +40,23 @@ func runStart(cmd *cobra.Command, args []string) error { return err } + // Check if sn-manager is already running + managerPidPath := filepath.Join(home, managerPIDFile) + if pidData, err := os.ReadFile(managerPidPath); err == nil { + if pid, err := strconv.Atoi(strings.TrimSpace(string(pidData))); err == nil { + if process, err := os.FindProcess(pid); err == nil { + if err := process.Signal(syscall.Signal(0)); err == nil { + // Manager is already running + return fmt.Errorf("sn-manager is already running (PID %d)", pid) + } + } + } + // Stale PID file, remove it + if err := os.Remove(managerPidPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove stale manager PID file: %v", err) + } + } + // Load config cfg, err := loadConfig() if err != nil { @@ -50,9 +69,8 @@ func runStart(cmd *cobra.Command, args []string) error { } // Check if SuperNode is initialized - supernodeConfigPath := filepath.Join(os.Getenv("HOME"), ".supernode", "config.yml") - if _, err := os.Stat(supernodeConfigPath); os.IsNotExist(err) { - return fmt.Errorf("SuperNode not initialized. Please run 'sn-manager init' first to configure your validator keys and network settings") + if err := ensureSupernodeInitialized(); err != nil { + return err } // Create manager instance @@ -68,82 +86,65 @@ func runStart(cmd *cobra.Command, args []string) error { sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) - // Start SuperNode - if err := mgr.Start(ctx); err != nil { - return fmt.Errorf("failed to start supernode: %w", err) + // Save sn-manager PID early to minimize race for multiple instances + managerPidPath = filepath.Join(home, managerPIDFile) + if err := os.WriteFile(managerPidPath, []byte(fmt.Sprintf("%d", os.Getpid())), 0644); err != nil { + log.Printf("Warning: failed to save sn-manager PID file: %v", err) + } + defer os.Remove(managerPidPath) + + // If there was a previous explicit stop, clear it now since user called start + stopMarkerPath := filepath.Join(home, stopMarkerFile) + if _, err := os.Stat(stopMarkerPath); err == nil { + if err := os.Remove(stopMarkerPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove stop marker: %v", err) + } } // Start auto-updater if enabled var autoUpdater *updater.AutoUpdater if cfg.Updates.AutoUpgrade { - autoUpdater = updater.New(home, cfg) + autoUpdater = updater.New(home, cfg, appVersion) autoUpdater.Start(ctx) } - // Monitor SuperNode process exit - processExitCh := make(chan error, 1) + // Start monitoring in a goroutine + monitorDone := make(chan error, 1) go func() { - err := mgr.Wait() - processExitCh <- err + monitorDone <- mgr.Monitor(ctx) }() - // Main loop - monitor for updates if auto-upgrade is enabled - ticker := time.NewTicker(5 * time.Second) - defer ticker.Stop() + // Wait for shutdown signal or monitor exit + select { + case <-sigChan: + fmt.Println("\nShutting down...") - for { - select { - case err := <-processExitCh: - // SuperNode process exited - if autoUpdater != nil { - autoUpdater.Stop() - } - if err != nil { - return fmt.Errorf("supernode exited with error: %w", err) - } - fmt.Println("SuperNode exited") - return nil + // Stop auto-updater if running + if autoUpdater != nil { + autoUpdater.Stop() + } - case <-sigChan: - fmt.Println("\nShutting down...") + // Cancel context to stop monitoring + cancel() - // Stop auto-updater if running - if autoUpdater != nil { - autoUpdater.Stop() - } + // Wait for monitor to finish + <-monitorDone - // Stop SuperNode + // Stop SuperNode if still running + if mgr.IsRunning() { if err := mgr.Stop(); err != nil { - return fmt.Errorf("failed to stop supernode: %w", err) + log.Printf("Failed to stop supernode: %v", err) } + } - return nil - - case <-ticker.C: - // Check if binary has been updated and restart if needed - if cfg.Updates.AutoUpgrade { - if shouldRestart(home, mgr) { - fmt.Println("Binary updated, restarting SuperNode...") - - // Stop current process - if err := mgr.Stop(); err != nil { - log.Printf("Failed to stop for restart: %v", err) - continue - } - - // Wait a moment - time.Sleep(2 * time.Second) - - // Start with new binary - if err := mgr.Start(ctx); err != nil { - log.Printf("Failed to restart with new binary: %v", err) - continue - } + return nil - fmt.Println("SuperNode restarted with new version") - } - } + case err := <-monitorDone: + // Monitor exited unexpectedly + if err != nil { + return fmt.Errorf("monitor error: %w", err) } + return nil } } @@ -202,20 +203,11 @@ func ensureBinaryExists(home string, cfg *config.Config) error { os.MkdirAll(filepath.Dir(tempFile), 0755) // Download with progress - var lastPercent int - progress := func(downloaded, total int64) { - if total > 0 { - percent := int(downloaded * 100 / total) - if percent != lastPercent && percent%10 == 0 { - fmt.Printf("\rProgress: %d%%", percent) - lastPercent = percent - } - } - } - + progress, done := newDownloadProgressPrinter() if err := client.DownloadBinary(downloadURL, tempFile, progress); err != nil { return fmt.Errorf("failed to download binary: %w", err) } + done() fmt.Println("Download complete. Installing...") @@ -225,7 +217,9 @@ func ensureBinaryExists(home string, cfg *config.Config) error { } // Clean up temp file - os.Remove(tempFile) + if err := os.Remove(tempFile); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove temp file: %v", err) + } // Set as current version if err := versionMgr.SetCurrentVersion(targetVersion); err != nil { @@ -242,15 +236,3 @@ func ensureBinaryExists(home string, cfg *config.Config) error { fmt.Printf("Successfully installed SuperNode %s\n", targetVersion) return nil } - -// shouldRestart checks if the binary has been updated -func shouldRestart(home string, mgr *manager.Manager) bool { - // Check for restart marker file - markerPath := filepath.Join(home, ".needs_restart") - if _, err := os.Stat(markerPath); err == nil { - // Remove the marker and return true - os.Remove(markerPath) - return true - } - return false -} diff --git a/sn-manager/cmd/status.go b/sn-manager/cmd/status.go index 875fabca..a36f42ec 100644 --- a/sn-manager/cmd/status.go +++ b/sn-manager/cmd/status.go @@ -2,10 +2,9 @@ package cmd import ( "fmt" + "log" "os" "path/filepath" - "strconv" - "syscall" "github.com/spf13/cobra" ) @@ -33,41 +32,28 @@ func runStatus(cmd *cobra.Command, args []string) error { } // Check PID file - pidPath := filepath.Join(home, "supernode.pid") - pidData, err := os.ReadFile(pidPath) + pidPath := filepath.Join(home, supernodePIDFile) + pid, err := readPIDFromFile(pidPath) if err != nil { fmt.Println("SuperNode Status:") fmt.Println(" Status: Not running") fmt.Printf(" Current Version: %s\n", cfg.Updates.CurrentVersion) fmt.Printf(" Manager Version: %s\n", appVersion) - return nil - } - - // Parse PID - pid, err := strconv.Atoi(string(pidData)) - if err != nil { - fmt.Println("SuperNode Status:") - fmt.Println(" Status: Invalid PID file") + fmt.Printf(" Auto-upgrade: %v\n", cfg.Updates.AutoUpgrade) return nil } // Check if process is running - process, err := os.FindProcess(pid) - if err != nil { - fmt.Println("SuperNode Status:") - fmt.Println(" Status: Not running (stale PID)") - fmt.Printf(" Current Version: %s\n", cfg.Updates.CurrentVersion) - return nil - } - - // Send signal 0 to check if process exists - err = process.Signal(syscall.Signal(0)) - if err != nil { + if _, alive := getProcessIfAlive(pid); !alive { fmt.Println("SuperNode Status:") fmt.Println(" Status: Not running (process dead)") fmt.Printf(" Current Version: %s\n", cfg.Updates.CurrentVersion) + fmt.Printf(" Manager Version: %s\n", appVersion) + fmt.Printf(" Auto-upgrade: %v\n", cfg.Updates.AutoUpgrade) // Clean up stale PID file - os.Remove(pidPath) + if err := os.Remove(pidPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove stale PID file: %v", err) + } return nil } diff --git a/sn-manager/cmd/stop.go b/sn-manager/cmd/stop.go index 36cbdde1..c052510b 100644 --- a/sn-manager/cmd/stop.go +++ b/sn-manager/cmd/stop.go @@ -2,9 +2,9 @@ package cmd import ( "fmt" + "log" "os" "path/filepath" - "strconv" "syscall" "time" @@ -13,61 +13,54 @@ import ( var stopCmd = &cobra.Command{ Use: "stop", - Short: "Stop the managed SuperNode", - Long: `Stop the SuperNode process gracefully.`, + Short: "Stop sn-manager and SuperNode", + Long: `Stop both the sn-manager daemon and the SuperNode process.`, RunE: runStop, } func runStop(cmd *cobra.Command, args []string) error { home := getHomeDir() - // Check PID file - pidPath := filepath.Join(home, "supernode.pid") - pidData, err := os.ReadFile(pidPath) + // Stop the manager (which will handle stopping SuperNode) + managerPidPath := filepath.Join(home, managerPIDFile) + mgrPid, err := readPIDFromFile(managerPidPath) if err != nil { if os.IsNotExist(err) { - fmt.Println("SuperNode is not running") + fmt.Println("sn-manager is not running") return nil } - return fmt.Errorf("failed to read PID file: %w", err) + return fmt.Errorf("failed to read manager PID file: %w", err) } - // Parse PID - pid, err := strconv.Atoi(string(pidData)) - if err != nil { - return fmt.Errorf("invalid PID file: %w", err) - } - - // Find process - process, err := os.FindProcess(pid) - if err != nil { - return fmt.Errorf("failed to find process: %w", err) - } - - // Check if process is actually running - if err := process.Signal(syscall.Signal(0)); err != nil { - fmt.Println("SuperNode is not running (stale PID)") - os.Remove(pidPath) + // Find manager process and verify it's alive + mgrProcess, alive := getProcessIfAlive(mgrPid) + if !alive { + fmt.Println("sn-manager is not running (stale PID)") + if err := os.Remove(managerPidPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove stale manager PID file: %v", err) + } return nil } - fmt.Printf("Stopping SuperNode (PID %d)...\n", pid) + fmt.Printf("Stopping sn-manager (PID %d)...\n", mgrPid) - // Send SIGTERM for graceful shutdown - if err := process.Signal(syscall.SIGTERM); err != nil { - return fmt.Errorf("failed to send stop signal: %w", err) + // Send SIGTERM to manager for graceful shutdown + if err := mgrProcess.Signal(syscall.SIGTERM); err != nil { + return fmt.Errorf("failed to send stop signal to manager: %w", err) } - // Wait for process to exit (with timeout) - timeout := 30 * time.Second + // Wait for manager to exit (with timeout) + timeout := 10 * time.Second checkInterval := 100 * time.Millisecond elapsed := time.Duration(0) for elapsed < timeout { - if err := process.Signal(syscall.Signal(0)); err != nil { + if err := mgrProcess.Signal(syscall.Signal(0)); err != nil { // Process has exited - fmt.Println("SuperNode stopped successfully") - os.Remove(pidPath) + fmt.Println("sn-manager stopped") + if err := os.Remove(managerPidPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove manager PID file: %v", err) + } return nil } time.Sleep(checkInterval) @@ -75,12 +68,14 @@ func runStop(cmd *cobra.Command, args []string) error { } // Timeout reached, force kill - fmt.Println("Graceful shutdown timeout, forcing stop...") - if err := process.Kill(); err != nil { - return fmt.Errorf("failed to force stop: %w", err) + fmt.Println("Graceful shutdown timeout, forcing manager stop...") + if err := mgrProcess.Kill(); err != nil { + return fmt.Errorf("failed to force stop manager: %w", err) } - os.Remove(pidPath) - fmt.Println("SuperNode stopped (forced)") + if err := os.Remove(managerPidPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove manager PID file: %v", err) + } + fmt.Println("sn-manager stopped (forced)") return nil } diff --git a/sn-manager/cmd/supernode.go b/sn-manager/cmd/supernode.go new file mode 100644 index 00000000..cd5678be --- /dev/null +++ b/sn-manager/cmd/supernode.go @@ -0,0 +1,20 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +// supernodeCmd is a logical group for SuperNode-specific controls +var supernodeCmd = &cobra.Command{ + Use: "supernode", + Short: "Control the managed SuperNode", + Long: `Start, stop, and inspect the managed SuperNode without affecting the sn-manager service itself.`, +} + +func init() { + // Attach subcommands under supernode group + supernodeCmd.AddCommand(supernodeStartCmd) + supernodeCmd.AddCommand(supernodeStopCmd) + supernodeCmd.AddCommand(supernodeStatusCmd) +} + diff --git a/sn-manager/cmd/supernode_start.go b/sn-manager/cmd/supernode_start.go new file mode 100644 index 00000000..f3727140 --- /dev/null +++ b/sn-manager/cmd/supernode_start.go @@ -0,0 +1,71 @@ +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + + "github.com/spf13/cobra" +) + +var supernodeStartCmd = &cobra.Command{ + Use: "start", + Short: "Start the managed SuperNode", + Long: `Signal the running sn-manager to start SuperNode. Requires sn-manager service to be running.`, + RunE: runSupernodeStart, +} + +func runSupernodeStart(cmd *cobra.Command, args []string) error { + // Ensure manager is initialized + if err := checkInitialized(); err != nil { + return err + } + + home := getHomeDir() + + // Check if sn-manager (service) is running via manager PID file + managerPidPath := filepath.Join(home, managerPIDFile) + data, err := os.ReadFile(managerPidPath) + if err != nil { + if os.IsNotExist(err) { + fmt.Println("sn-manager is not running. Start it with: sn-manager start") + return nil + } + return fmt.Errorf("failed to read sn-manager PID: %w", err) + } + pidStr := strings.TrimSpace(string(data)) + pid, _ := strconv.Atoi(pidStr) + proc, alive := getProcessIfAlive(pid) + if !alive { + // Stale PID file, clean up and instruct user + _ = os.Remove(managerPidPath) + fmt.Println("sn-manager is not running. Start it with: sn-manager start") + return nil + } + // Best-effort ping + _ = proc.Signal(syscall.Signal(0)) + + // Remove stop marker to allow the manager to start SuperNode + stopMarker := filepath.Join(home, stopMarkerFile) + if err := os.Remove(stopMarker); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to clear stop marker: %w", err) + } + + // If SuperNode already running, just inform + pidPath := filepath.Join(home, supernodePIDFile) + if p, err := readPIDFromFile(pidPath); err == nil { + if _, ok := getProcessIfAlive(p); ok { + fmt.Println("SuperNode is already running") + return nil + } + // stale pid file + _ = os.Remove(pidPath) + } + + fmt.Println("Request sent. The running sn-manager will start SuperNode shortly.") + return nil +} + diff --git a/sn-manager/cmd/supernode_status.go b/sn-manager/cmd/supernode_status.go new file mode 100644 index 00000000..0ca9ce52 --- /dev/null +++ b/sn-manager/cmd/supernode_status.go @@ -0,0 +1,12 @@ +package cmd + +import "github.com/spf13/cobra" + +// supernodeStatusCmd reuses the existing status logic +var supernodeStatusCmd = &cobra.Command{ + Use: "status", + Short: "Show SuperNode status", + Long: `Display the current status of the managed SuperNode process.`, + RunE: runStatus, +} + diff --git a/sn-manager/cmd/supernode_stop.go b/sn-manager/cmd/supernode_stop.go new file mode 100644 index 00000000..7823051d --- /dev/null +++ b/sn-manager/cmd/supernode_stop.go @@ -0,0 +1,88 @@ +package cmd + +import ( + "fmt" + "log" + "os" + "path/filepath" + "syscall" + "time" + + "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/manager" + "github.com/spf13/cobra" +) + +// supernodeStopCmd stops only SuperNode (not the manager) +var supernodeStopCmd = &cobra.Command{ + Use: "stop", + Short: "Stop the managed SuperNode", + Long: `Gracefully stop the managed SuperNode and prevent auto-restart until started again.`, + RunE: runSupernodeStop, +} + +func runSupernodeStop(cmd *cobra.Command, args []string) error { + home := getHomeDir() + + // Check PID file + pidPath := filepath.Join(home, supernodePIDFile) + pid, err := readPIDFromFile(pidPath) + if err != nil { + if os.IsNotExist(err) { + fmt.Println("SuperNode is not running") + return nil + } + return fmt.Errorf("failed to read PID file: %w", err) + } + + // Find process and verify it's alive + process, alive := getProcessIfAlive(pid) + if !alive { + fmt.Println("SuperNode is not running (stale PID)") + if err := os.Remove(pidPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove stale PID file: %v", err) + } + return nil + } + + fmt.Printf("Stopping SuperNode (PID %d)...\n", pid) + + // Write stop marker so manager does not auto-restart + stopMarker := filepath.Join(home, stopMarkerFile) + _ = os.WriteFile(stopMarker, []byte("1"), 0644) + + // Send SIGTERM for graceful shutdown + if err := process.Signal(syscall.SIGTERM); err != nil { + return fmt.Errorf("failed to send stop signal: %w", err) + } + + // Wait for process to exit (with timeout) + timeout := manager.DefaultShutdownTimeout + checkInterval := 100 * time.Millisecond + elapsed := time.Duration(0) + + for elapsed < timeout { + if err := process.Signal(syscall.Signal(0)); err != nil { + // Process has exited + fmt.Println("SuperNode stopped successfully") + if err := os.Remove(pidPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove PID file: %v", err) + } + return nil + } + time.Sleep(checkInterval) + elapsed += checkInterval + } + + // Timeout reached, force kill + fmt.Println("Graceful shutdown timeout, forcing stop...") + if err := process.Kill(); err != nil { + return fmt.Errorf("failed to force stop: %w", err) + } + + if err := os.Remove(pidPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove PID file: %v", err) + } + fmt.Println("SuperNode stopped (forced)") + return nil +} + diff --git a/sn-manager/cmd/use.go b/sn-manager/cmd/use.go index 9a9f0b50..02edeed8 100644 --- a/sn-manager/cmd/use.go +++ b/sn-manager/cmd/use.go @@ -21,10 +21,7 @@ func runUse(cmd *cobra.Command, args []string) error { return err } - targetVersion := args[0] - if targetVersion[0] != 'v' { - targetVersion = "v" + targetVersion - } + targetVersion := normalizeVersionTag(args[0]) managerHome := config.GetManagerHome() versionMgr := version.NewManager(managerHome) @@ -50,4 +47,4 @@ func runUse(cmd *cobra.Command, args []string) error { fmt.Printf("✓ Switched to %s\n", targetVersion) return nil -} \ No newline at end of file +} diff --git a/sn-manager/go.mod b/sn-manager/go.mod index 65961fb8..801f008c 100644 --- a/sn-manager/go.mod +++ b/sn-manager/go.mod @@ -5,15 +5,12 @@ go 1.24.1 require ( github.com/AlecAivazis/survey/v2 v2.3.7 github.com/LumeraProtocol/supernode/v2 v2.0.0-00010101000000-000000000000 - github.com/golang/mock v1.6.0 github.com/spf13/cobra v1.8.1 - github.com/stretchr/testify v1.10.0 go.uber.org/mock v0.5.2 gopkg.in/yaml.v3 v3.0.1 ) require ( - github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -21,7 +18,6 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b // indirect - github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/spf13/pflag v1.0.5 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect diff --git a/sn-manager/go.sum b/sn-manager/go.sum index 35802a5a..51f96134 100644 --- a/sn-manager/go.sum +++ b/sn-manager/go.sum @@ -27,8 +27,6 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= -github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= @@ -72,7 +70,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= @@ -95,14 +92,12 @@ go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN8 go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -113,7 +108,6 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= @@ -123,7 +117,6 @@ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -131,8 +124,6 @@ golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -156,10 +147,8 @@ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= diff --git a/sn-manager/internal/config/config.go b/sn-manager/internal/config/config.go index 2452d2ce..89326d50 100644 --- a/sn-manager/internal/config/config.go +++ b/sn-manager/internal/config/config.go @@ -14,6 +14,8 @@ const ( ManagerHomeDir = ".sn-manager" // GitHubRepo is the constant GitHub repository for supernode GitHubRepo = "LumeraProtocol/supernode" + // DefaultUpdateCheckInterval is the default interval between update checks + DefaultUpdateCheckInterval = 3600 // 1 hour in seconds ) // Config represents the sn-manager configuration @@ -32,7 +34,7 @@ type UpdateConfig struct { func DefaultConfig() *Config { return &Config{ Updates: UpdateConfig{ - CheckInterval: 3600, // 1 hour + CheckInterval: DefaultUpdateCheckInterval, AutoUpgrade: true, // enabled by default for security CurrentVersion: "", // will be set when first binary is installed }, @@ -62,7 +64,7 @@ func Load(path string) (*Config, error) { // Apply defaults for missing values if cfg.Updates.CheckInterval == 0 { - cfg.Updates.CheckInterval = 3600 + cfg.Updates.CheckInterval = DefaultUpdateCheckInterval } return &cfg, nil diff --git a/sn-manager/internal/github/client.go b/sn-manager/internal/github/client.go index cd50d59b..41e58239 100644 --- a/sn-manager/internal/github/client.go +++ b/sn-manager/internal/github/client.go @@ -6,6 +6,7 @@ import ( "encoding/json" "fmt" "io" + "log" "net/http" "os" "path/filepath" @@ -19,6 +20,7 @@ type GithubClient interface { ListReleases() ([]*Release, error) GetRelease(tag string) (*Release, error) GetSupernodeDownloadURL(version string) (string, error) + GetReleaseTarballURL(version string) (string, error) DownloadBinary(url, destPath string, progress func(downloaded, total int64)) error } @@ -49,32 +51,48 @@ type Client struct { downloadClient *http.Client } +const ( + // APITimeout is the timeout for GitHub API calls + APITimeout = 60 * time.Second // API request limit for unauthenticated calls + // DownloadTimeout is the timeout for binary downloads + DownloadTimeout = 5 * time.Minute + // GatewayTimeout is the timeout for gateway status checks + GatewayTimeout = 5 * time.Second +) + // NewClient creates a new GitHub API client func NewClient(repo string) GithubClient { return &Client{ repo: repo, httpClient: &http.Client{ - Timeout: 30 * time.Second, // 30 second timeout for API calls + Timeout: APITimeout, }, downloadClient: &http.Client{ - Timeout: 5 * time.Minute, // 5 minute timeout for large binary downloads + Timeout: DownloadTimeout, }, } } +// newRequest sets common headers for GitHub API and asset requests +func (c *Client) newRequest(method, url string, body io.Reader) (*http.Request, error) { + req, err := http.NewRequest(method, url, body) + if err != nil { + return nil, err + } + req.Header.Set("Accept", "application/vnd.github.v3+json") + req.Header.Set("User-Agent", "sn-manager") + return req, nil +} + // GetLatestRelease fetches the latest release from GitHub func (c *Client) GetLatestRelease() (*Release, error) { url := fmt.Sprintf("https://api.github.com/repos/%s/releases/latest", c.repo) - req, err := http.NewRequest("GET", url, nil) + req, err := c.newRequest("GET", url, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - // Add headers - req.Header.Set("Accept", "application/vnd.github.v3+json") - req.Header.Set("User-Agent", "sn-manager") - resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("failed to fetch release: %w", err) @@ -98,14 +116,11 @@ func (c *Client) GetLatestRelease() (*Release, error) { func (c *Client) ListReleases() ([]*Release, error) { url := fmt.Sprintf("https://api.github.com/repos/%s/releases", c.repo) - req, err := http.NewRequest("GET", url, nil) + req, err := c.newRequest("GET", url, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - req.Header.Set("Accept", "application/vnd.github.v3+json") - req.Header.Set("User-Agent", "sn-manager") - resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("failed to fetch releases: %w", err) @@ -127,6 +142,13 @@ func (c *Client) ListReleases() ([]*Release, error) { // GetLatestStableRelease fetches the latest stable (non-prerelease, non-draft) release from GitHub func (c *Client) GetLatestStableRelease() (*Release, error) { + // Try the latest release endpoint first (single API call) + release, err := c.GetLatestRelease() + if err == nil && !release.Draft && !release.Prerelease { + return release, nil + } + + // Fallback to listing all releases if latest is not stable releases, err := c.ListReleases() if err != nil { return nil, fmt.Errorf("failed to list releases: %w", err) @@ -146,14 +168,11 @@ func (c *Client) GetLatestStableRelease() (*Release, error) { func (c *Client) GetRelease(tag string) (*Release, error) { url := fmt.Sprintf("https://api.github.com/repos/%s/releases/tags/%s", c.repo, tag) - req, err := http.NewRequest("GET", url, nil) + req, err := c.newRequest("GET", url, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - req.Header.Set("Accept", "application/vnd.github.v3+json") - req.Header.Set("User-Agent", "sn-manager") - resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("failed to fetch release: %w", err) @@ -178,10 +197,22 @@ func (c *Client) GetSupernodeDownloadURL(version string) (string, error) { // First try the direct download URL (newer releases) directURL := fmt.Sprintf("https://github.com/%s/releases/download/%s/supernode-linux-amd64", c.repo, version) - // Check if this URL exists - resp, err := http.Head(directURL) - if err == nil && resp.StatusCode == http.StatusOK { - return directURL, nil + // Check if this URL exists using our client (with timeout) + req, err := c.newRequest("HEAD", directURL, nil) + if err == nil { + if resp, herr := c.httpClient.Do(req); herr == nil { + // Accept 2xx and 3xx as existence (GitHub may redirect) + if resp.StatusCode >= 200 && resp.StatusCode < 400 { + if err := resp.Body.Close(); err != nil { + log.Printf("Warning: failed to close response body: %v", err) + } + return directURL, nil + } + io.Copy(io.Discard, resp.Body) + if err := resp.Body.Close(); err != nil { + log.Printf("Warning: failed to close response body: %v", err) + } + } } // Fall back to checking release assets @@ -200,6 +231,41 @@ func (c *Client) GetSupernodeDownloadURL(version string) (string, error) { return "", fmt.Errorf("no Linux amd64 binary found for version %s", version) } +// GetReleaseTarballURL returns the download URL for the combined release tarball +// that includes both supernode and sn-manager binaries. +func (c *Client) GetReleaseTarballURL(version string) (string, error) { + // Try direct URL first + tarName := "supernode-linux-amd64.tar.gz" + directURL := fmt.Sprintf("https://github.com/%s/releases/download/%s/%s", c.repo, version, tarName) + + if req, err := c.newRequest("HEAD", directURL, nil); err == nil { + if resp, herr := c.httpClient.Do(req); herr == nil { + if resp.StatusCode >= 200 && resp.StatusCode < 400 { + if err := resp.Body.Close(); err != nil { + log.Printf("Warning: failed to close response body: %v", err) + } + return directURL, nil + } + io.Copy(io.Discard, resp.Body) + if err := resp.Body.Close(); err != nil { + log.Printf("Warning: failed to close response body: %v", err) + } + } + } + + // Fallback to release assets lookup + release, err := c.GetRelease(version) + if err != nil { + return "", err + } + for _, asset := range release.Assets { + if asset.Name == tarName || (strings.Contains(asset.Name, "linux") && strings.HasSuffix(asset.Name, ".tar.gz")) { + return asset.DownloadURL, nil + } + } + return "", fmt.Errorf("no suitable tarball asset found for version %s", version) +} + // DownloadBinary downloads a binary from the given URL func (c *Client) DownloadBinary(url, destPath string, progress func(downloaded, total int64)) error { // Create destination directory @@ -216,8 +282,13 @@ func (c *Client) DownloadBinary(url, destPath string, progress func(downloaded, } defer os.Remove(tmpPath) - // Download file - resp, err := c.downloadClient.Get(url) + // Download file using request with headers + req, err := c.newRequest("GET", url, nil) + if err != nil { + tmpFile.Close() + return fmt.Errorf("failed to create request: %w", err) + } + resp, err := c.downloadClient.Do(req) if err != nil { tmpFile.Close() return fmt.Errorf("failed to download: %w", err) @@ -229,27 +300,17 @@ func (c *Client) DownloadBinary(url, destPath string, progress func(downloaded, return fmt.Errorf("download failed with status %d", resp.StatusCode) } - // Copy with progress reporting - var written int64 - buf := make([]byte, 32*1024) // 32KB buffer + // Copy with progress reporting using TeeReader total := resp.ContentLength - - for { - n, err := resp.Body.Read(buf) - if n > 0 { - if _, writeErr := tmpFile.Write(buf[:n]); writeErr != nil { - tmpFile.Close() - return fmt.Errorf("failed to write file: %w", writeErr) - } - written += int64(n) - if progress != nil { - progress(written, total) - } - } - if err == io.EOF { - break + if progress != nil { + pr := &progressReporter{cb: progress, total: total} + reader := io.TeeReader(resp.Body, pr) + if _, err := io.Copy(tmpFile, reader); err != nil { + tmpFile.Close() + return fmt.Errorf("download error: %w", err) } - if err != nil { + } else { + if _, err := io.Copy(tmpFile, resp.Body); err != nil { tmpFile.Close() return fmt.Errorf("download error: %w", err) } @@ -272,3 +333,19 @@ func (c *Client) DownloadBinary(url, destPath string, progress func(downloaded, return nil } + +// progressReporter reports progress to a callback while counting bytes +type progressReporter struct { + cb func(downloaded, total int64) + total int64 + written int64 +} + +func (p *progressReporter) Write(b []byte) (int, error) { + n := len(b) + p.written += int64(n) + if p.cb != nil { + p.cb(p.written, p.total) + } + return n, nil +} diff --git a/sn-manager/internal/manager/manager.go b/sn-manager/internal/manager/manager.go index 016221c4..fd176121 100644 --- a/sn-manager/internal/manager/manager.go +++ b/sn-manager/internal/manager/manager.go @@ -108,25 +108,22 @@ func (m *Manager) Stop() error { return fmt.Errorf("failed to send SIGTERM: %w", err) } - // Wait for graceful shutdown with timeout - done := make(chan error, 1) - go func() { - _, err := m.process.Wait() - done <- err - }() - - timeout := 30 * time.Second // Default shutdown timeout - select { - case <-time.After(timeout): + // Poll for graceful shutdown with timeout without calling Wait to avoid double-wait + timeout := DefaultShutdownTimeout + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if err := m.process.Signal(syscall.Signal(0)); err != nil { + // Process no longer exists + break + } + time.Sleep(100 * time.Millisecond) + } + // If still alive, force kill + if err := m.process.Signal(syscall.Signal(0)); err == nil { log.Printf("Graceful shutdown timeout, forcing kill...") if err := m.process.Kill(); err != nil { return fmt.Errorf("failed to kill process: %w", err) } - <-done - case err := <-done: - if err != nil && err.Error() != "signal: terminated" { - log.Printf("Process exited with error: %v", err) - } } // Cleanup @@ -169,5 +166,182 @@ func (m *Manager) cleanup() { // Remove PID file pidPath := filepath.Join(m.homeDir, "supernode.pid") - os.Remove(pidPath) + if err := os.Remove(pidPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove PID file: %v", err) + } +} + +// Constants for process management +const ( + DefaultShutdownTimeout = 30 * time.Second + ProcessCheckInterval = 5 * time.Second + CrashBackoffDelay = 2 * time.Second + StopMarkerFile = ".stop_requested" + RestartMarkerFile = ".needs_restart" +) + +// Monitor continuously supervises the SuperNode process +// It ensures SuperNode is always running unless a stop marker is present +func (m *Manager) Monitor(ctx context.Context) error { + + // Create ticker for periodic checks + ticker := time.NewTicker(ProcessCheckInterval) + defer ticker.Stop() + + // Channel to monitor process exits + processExitCh := make(chan error, 1) + + // Function to arm the process wait goroutine + armProcessWait := func() { + processExitCh = make(chan error, 1) + go func() { + if err := m.Wait(); err != nil { + processExitCh <- err + } else { + processExitCh <- nil + } + }() + } + + // Initial check and start if needed + stopMarkerPath := filepath.Join(m.homeDir, StopMarkerFile) + if _, err := os.Stat(stopMarkerPath); os.IsNotExist(err) { + // No stop marker, ensure SuperNode is running + if !m.IsRunning() { + log.Println("Starting SuperNode...") + if err := m.Start(ctx); err != nil { + log.Printf("Failed to start SuperNode: %v", err) + } else { + armProcessWait() + } + } else { + // Already running, arm the wait + armProcessWait() + } + } else { + log.Println("Stop marker present, SuperNode will not be started") + } + + // Main supervision loop + for { + select { + case <-ctx.Done(): + // Context cancelled, stop monitoring + return ctx.Err() + + case err := <-processExitCh: + // SuperNode process exited + if err != nil { + log.Printf("SuperNode exited with error: %v", err) + } else { + log.Printf("SuperNode exited normally") + } + + // Cleanup internal state after exit + m.mu.Lock() + m.cleanup() + m.mu.Unlock() + + // Check if we should restart + if _, err := os.Stat(stopMarkerPath); err == nil { + log.Println("Stop marker present, not restarting SuperNode") + continue + } + + // Apply backoff to prevent rapid restart loops + time.Sleep(CrashBackoffDelay) + + // Restart SuperNode + log.Println("Restarting SuperNode after crash...") + if err := m.Start(ctx); err != nil { + log.Printf("Failed to restart SuperNode: %v", err) + continue + } + armProcessWait() + log.Println("SuperNode restarted successfully") + + case <-ticker.C: + // Periodic check for various conditions + + // 1. Check if stop marker was removed and we should start + if !m.IsRunning() { + if _, err := os.Stat(stopMarkerPath); os.IsNotExist(err) { + log.Println("Stop marker removed, starting SuperNode...") + if err := m.Start(ctx); err != nil { + log.Printf("Failed to start SuperNode: %v", err) + } else { + armProcessWait() + log.Println("SuperNode started") + } + } + } + + // 2. Check if binary was updated and needs restart + restartMarkerPath := filepath.Join(m.homeDir, RestartMarkerFile) + if _, err := os.Stat(restartMarkerPath); err == nil { + if m.IsRunning() { + log.Println("Binary update detected, restarting SuperNode...") + + // Remove the restart marker + if err := os.Remove(restartMarkerPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove restart marker: %v", err) + } + + // Create temporary stop marker for clean restart + tmpStopMarker := []byte("update") + os.WriteFile(stopMarkerPath, tmpStopMarker, 0644) + + // Stop current process + if err := m.Stop(); err != nil { + log.Printf("Failed to stop for update: %v", err) + if err := os.Remove(stopMarkerPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove stop marker: %v", err) + } + continue + } + + // Brief pause + time.Sleep(CrashBackoffDelay) + + // Remove temporary stop marker + if err := os.Remove(stopMarkerPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove stop marker: %v", err) + } + + // Start with new binary + log.Println("Starting with updated binary...") + if err := m.Start(ctx); err != nil { + log.Printf("Failed to start updated binary: %v", err) + } else { + armProcessWait() + log.Println("SuperNode restarted with new binary") + } + } + } + + // 3. Health check - ensure process is actually alive + if m.IsRunning() { + // Process thinks it's running, verify it really is + m.mu.RLock() + proc := m.process + m.mu.RUnlock() + + if proc != nil { + if err := proc.Signal(syscall.Signal(0)); err != nil { + // Process is dead but not cleaned up + log.Println("Detected stale process, cleaning up...") + m.mu.Lock() + m.cleanup() + m.mu.Unlock() + } + } + } + } + } } + +// GetConfig returns the manager configuration +func (m *Manager) GetConfig() *config.Config { + return m.config +} + diff --git a/sn-manager/internal/updater/updater.go b/sn-manager/internal/updater/updater.go index a825f9e8..c8cfc96b 100644 --- a/sn-manager/internal/updater/updater.go +++ b/sn-manager/internal/updater/updater.go @@ -2,7 +2,6 @@ package updater import ( "context" - "encoding/json" "fmt" "log" "net/http" @@ -11,42 +10,47 @@ import ( "strings" "time" + "archive/tar" + "compress/gzip" + "io" + + pb "github.com/LumeraProtocol/supernode/v2/gen/supernode" "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/config" "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/github" "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/utils" "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/version" "github.com/LumeraProtocol/supernode/v2/supernode/node/supernode/gateway" + "google.golang.org/protobuf/encoding/protojson" ) type AutoUpdater struct { - config *config.Config - homeDir string - githubClient github.GithubClient - versionMgr *version.Manager - gatewayURL string - ticker *time.Ticker - stopCh chan struct{} + config *config.Config + homeDir string + githubClient github.GithubClient + versionMgr *version.Manager + gatewayURL string + ticker *time.Ticker + stopCh chan struct{} + managerVersion string + // Gateway error backoff state + gwErrCount int + gwErrWindowStart time.Time } -type StatusResponse struct { - RunningTasks []struct { - ServiceName string `json:"service_name"` - TaskIDs []string `json:"task_ids"` - TaskCount int `json:"task_count"` - } `json:"running_tasks"` -} +// Use protobuf JSON decoding for gateway responses (int64s encoded as strings) -func New(homeDir string, cfg *config.Config) *AutoUpdater { +func New(homeDir string, cfg *config.Config, managerVersion string) *AutoUpdater { // Use the correct gateway endpoint with imported constants gatewayURL := fmt.Sprintf("http://localhost:%d/api/v1/status", gateway.DefaultGatewayPort) - + return &AutoUpdater{ - config: cfg, - homeDir: homeDir, - githubClient: github.NewClient(config.GitHubRepo), - versionMgr: version.NewManager(homeDir), - gatewayURL: gatewayURL, - stopCh: make(chan struct{}), + config: cfg, + homeDir: homeDir, + githubClient: github.NewClient(config.GitHubRepo), + versionMgr: version.NewManager(homeDir), + gatewayURL: gatewayURL, + stopCh: make(chan struct{}), + managerVersion: managerVersion, } } @@ -59,13 +63,11 @@ func (u *AutoUpdater) Start(ctx context.Context) { interval := time.Duration(u.config.Updates.CheckInterval) * time.Second u.ticker = time.NewTicker(interval) - u.checkAndUpdate(ctx) - go func() { for { select { case <-u.ticker.C: - u.checkAndUpdate(ctx) + u.checkAndUpdateCombined() case <-u.stopCh: return case <-ctx.Done(): @@ -78,51 +80,23 @@ func (u *AutoUpdater) Start(ctx context.Context) { func (u *AutoUpdater) Stop() { if u.ticker != nil { u.ticker.Stop() + u.ticker = nil } - close(u.stopCh) -} - -func (u *AutoUpdater) checkAndUpdate(ctx context.Context) { - log.Println("Checking for updates...") - - release, err := u.githubClient.GetLatestRelease() - if err != nil { - log.Printf("Failed to check for updates: %v", err) - return - } - - latestVersion := release.TagName - currentVersion := u.config.Updates.CurrentVersion - - log.Printf("Version comparison: current=%s, latest=%s", currentVersion, latestVersion) - - if !u.ShouldUpdate(currentVersion, latestVersion) { - log.Printf("Current version %s is up to date", currentVersion) - return - } - - log.Printf("Update available: %s -> %s", currentVersion, latestVersion) - - if !u.isGatewayIdle() { - log.Println("Gateway busy, skipping update") - return - } - - if err := u.performUpdate(latestVersion); err != nil { - log.Printf("Update failed: %v", err) - return + // Make Stop idempotent by only closing once + select { + case <-u.stopCh: + // already closed + default: + close(u.stopCh) } - - log.Printf("Updated to %s", latestVersion) } func (u *AutoUpdater) ShouldUpdate(current, latest string) bool { current = strings.TrimPrefix(current, "v") latest = strings.TrimPrefix(latest, "v") - // Skip pre-release versions (beta, alpha, rc, etc.) + // Skip pre-release targets (beta, alpha, rc, etc.) if strings.Contains(latest, "-") { - log.Printf("Skipping pre-release version: %s", latest) return false } @@ -130,101 +104,312 @@ func (u *AutoUpdater) ShouldUpdate(current, latest string) bool { currentBase := strings.Split(current, "-")[0] latestBase := strings.Split(latest, "-")[0] - log.Printf("Comparing base versions: current=%s, latest=%s", currentBase, latestBase) - - currentParts := strings.Split(currentBase, ".") - latestParts := strings.Split(latestBase, ".") - - if len(currentParts) < 3 || len(latestParts) < 3 { - log.Printf("Invalid version format: current=%s, latest=%s", currentBase, latestBase) - return false - } - // Only update within same major version (allow minor and patch updates) - if currentParts[0] != latestParts[0] { - log.Printf("Major version mismatch, skipping update: %s vs %s", - currentParts[0], latestParts[0]) + if !utils.SameMajor(currentBase, latestBase) { + // Quietly skip major jumps; manual upgrade path covers this return false } // Compare base versions (stable releases only) cmp := utils.CompareVersions(currentBase, latestBase) + if cmp == 0 && current != currentBase { + // Current is a prerelease for the same base; update to stable + return true + } if cmp < 0 { - log.Printf("Update needed: %s < %s", currentBase, latestBase) return true } - - log.Printf("No update needed: %s >= %s", currentBase, latestBase) return false } -func (u *AutoUpdater) isGatewayIdle() bool { - client := &http.Client{Timeout: 5 * time.Second} +// isGatewayIdle returns (idle, isError). When isError is true, +// the gateway could not be reliably checked (network/error/invalid). +// When isError is false and idle is false, the gateway is busy. +func (u *AutoUpdater) isGatewayIdle() (bool, bool) { + client := &http.Client{Timeout: github.GatewayTimeout} resp, err := client.Get(u.gatewayURL) if err != nil { log.Printf("Failed to check gateway status: %v", err) - return false + // Error contacting gateway + return false, true } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - log.Printf("Gateway returned status %d", resp.StatusCode) - return false + log.Printf("Gateway returned status %d, not safe to update", resp.StatusCode) + return false, true } - var status StatusResponse - if err := json.NewDecoder(resp.Body).Decode(&status); err != nil { + var status pb.StatusResponse + body, err := io.ReadAll(resp.Body) + if err != nil { + log.Printf("Failed to read gateway response: %v", err) + return false, true + } + if err := protojson.Unmarshal(body, &status); err != nil { log.Printf("Failed to decode gateway response: %v", err) - return false + return false, true } totalTasks := 0 for _, service := range status.RunningTasks { - totalTasks += service.TaskCount + totalTasks += int(service.TaskCount) } if totalTasks > 0 { log.Printf("Gateway busy: %d running tasks", totalTasks) - return false + return false, false } - return true + return true, false } -func (u *AutoUpdater) performUpdate(targetVersion string) error { - downloadURL, err := u.githubClient.GetSupernodeDownloadURL(targetVersion) +// checkAndUpdateCombined performs a single release check and, if needed, +// downloads the release tarball once to update sn-manager and SuperNode. +// Order: update sn-manager first (prepare new binary), then SuperNode, then +// trigger restart if manager was updated. +func (u *AutoUpdater) checkAndUpdateCombined() { + + // Fetch latest stable release once + release, err := u.githubClient.GetLatestStableRelease() if err != nil { - return fmt.Errorf("failed to get download URL: %w", err) + log.Printf("Failed to check releases: %v", err) + return } - tempFile := filepath.Join(u.homeDir, "downloads", fmt.Sprintf("supernode-%s.tmp", targetVersion)) + latest := strings.TrimSpace(release.TagName) + if latest == "" { + return + } - // Silent download - no progress reporting - if err := u.githubClient.DownloadBinary(downloadURL, tempFile, nil); err != nil { - return fmt.Errorf("failed to download binary: %w", err) + // Determine if sn-manager should update (same criteria: stable, same major) + managerNeedsUpdate := false + ver := strings.TrimSpace(u.managerVersion) + if ver != "" && ver != "dev" && !strings.EqualFold(ver, "unknown") { + if utils.SameMajor(ver, latest) && utils.CompareVersions(ver, latest) < 0 { + managerNeedsUpdate = true + } } - if err := u.versionMgr.InstallVersion(targetVersion, tempFile); err != nil { - return fmt.Errorf("failed to install version: %w", err) + // Determine if SuperNode should update using existing policy + currentSN := u.config.Updates.CurrentVersion + supernodeNeedsUpdate := u.ShouldUpdate(currentSN, latest) + + if !managerNeedsUpdate && !supernodeNeedsUpdate { + return } - os.Remove(tempFile) + // Gate all updates (manager + SuperNode) on gateway idleness + // to avoid disrupting traffic during a self-update. + if idle, isErr := u.isGatewayIdle(); !idle { + if isErr { + // Track errors and possibly request a clean SuperNode restart + u.handleGatewayError() + } else { + log.Println("Gateway busy, deferring updates") + } + return + } - if err := u.versionMgr.SetCurrentVersion(targetVersion); err != nil { - return fmt.Errorf("failed to set current version: %w", err) + // Download the combined release tarball once + tarURL, err := u.githubClient.GetReleaseTarballURL(latest) + if err != nil { + log.Printf("Failed to get tarball URL: %v", err) + return + } + tarPath := filepath.Join(u.homeDir, "downloads", fmt.Sprintf("release-%s.tar.gz", latest)) + if err := u.githubClient.DownloadBinary(tarURL, tarPath, nil); err != nil { + log.Printf("Failed to download tarball: %v", err) + return } + defer func() { + if err := os.Remove(tarPath); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove tarball: %v", err) + } + }() - u.config.Updates.CurrentVersion = targetVersion - configPath := filepath.Join(u.homeDir, "config.yml") - if err := config.Save(u.config, configPath); err != nil { - return fmt.Errorf("failed to update config: %w", err) + // Open tarball for extraction once + f, err := os.Open(tarPath) + if err != nil { + log.Printf("Failed to open tarball: %v", err) + return } + defer f.Close() - markerPath := filepath.Join(u.homeDir, ".needs_restart") - if err := os.WriteFile(markerPath, []byte(targetVersion), 0644); err != nil { - log.Printf("Failed to create restart marker: %v", err) + gz, err := gzip.NewReader(f) + if err != nil { + log.Printf("Failed to create gzip reader: %v", err) + return } + defer gz.Close() + + tr := tar.NewReader(gz) + + // Prepare paths for extraction targets + exePath, err := os.Executable() + if err != nil { + log.Printf("Cannot determine executable path: %v", err) + return + } + exePath, _ = filepath.EvalSymlinks(exePath) + tmpManager := exePath + ".new" + tmpSN := filepath.Join(u.homeDir, "downloads", fmt.Sprintf("supernode-%s.tmp", latest)) + + extractedManager := false + extractedSN := false + + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + log.Printf("Tar read error: %v", err) + return + } + base := filepath.Base(hdr.Name) + + if managerNeedsUpdate && base == "sn-manager" && !extractedManager { + out, err := os.OpenFile(tmpManager, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0755) + if err != nil { + log.Printf("Failed to create temp sn-manager: %v", err) + return + } + if _, err := io.Copy(out, tr); err != nil { + out.Close() + os.Remove(tmpManager) + log.Printf("Failed to extract sn-manager: %v", err) + return + } + if err := out.Close(); err != nil { + os.Remove(tmpManager) + log.Printf("Failed to close temp sn-manager: %v", err) + return + } + extractedManager = true + continue + } - return nil + if supernodeNeedsUpdate && base == "supernode" && !extractedSN { + out, err := os.OpenFile(tmpSN, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0755) + if err != nil { + log.Printf("Failed to create temp supernode: %v", err) + return + } + if _, err := io.Copy(out, tr); err != nil { + out.Close() + os.Remove(tmpSN) + log.Printf("Failed to extract supernode: %v", err) + return + } + if err := out.Close(); err != nil { + os.Remove(tmpSN) + log.Printf("Failed to close temp supernode: %v", err) + return + } + extractedSN = true + continue + } + } + + // Apply sn-manager update first + managerUpdated := false + if managerNeedsUpdate { + if extractedManager { + if err := os.Rename(tmpManager, exePath); err != nil { + os.Remove(tmpManager) + log.Printf("Cannot replace sn-manager (%s). Update manually: %v", exePath, err) + } else { + if dirF, err := os.Open(filepath.Dir(exePath)); err == nil { + _ = dirF.Sync() + dirF.Close() + } + managerUpdated = true + log.Printf("sn-manager updated to %s", latest) + } + } else { + log.Printf("sn-manager binary not found in tarball; skipping") + } + } + + // Apply SuperNode update (idle already verified) and extracted + if supernodeNeedsUpdate { + if extractedSN { + if err := u.versionMgr.InstallVersion(latest, tmpSN); err != nil { + log.Printf("Failed to install SuperNode: %v", err) + } else { + if err := u.versionMgr.SetCurrentVersion(latest); err != nil { + log.Printf("Failed to activate SuperNode %s: %v", latest, err) + } else { + u.config.Updates.CurrentVersion = latest + if err := config.Save(u.config, filepath.Join(u.homeDir, "config.yml")); err != nil { + log.Printf("Failed to save config: %v", err) + } + if err := os.WriteFile(filepath.Join(u.homeDir, ".needs_restart"), []byte(latest), 0644); err != nil { + log.Printf("Failed to write restart marker: %v", err) + } + log.Printf("SuperNode updated to %s", latest) + } + } + if err := os.Remove(tmpSN); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove temp supernode: %v", err) + } + } else { + log.Printf("supernode binary not found in tarball; skipping") + } + } + + // If manager updated, restart service after completing all work + if managerUpdated { + log.Printf("Self-update applied, restarting service...") + go func() { + time.Sleep(500 * time.Millisecond) + os.Exit(3) + }() + } +} + +// handleGatewayError increments an error counter in a rolling 5-minute window +// and when the threshold is reached, requests a clean SuperNode restart by +// writing the standard restart marker consumed by the manager monitor. +func (u *AutoUpdater) handleGatewayError() { + const ( + window = 5 * time.Minute + retries = 3 // attempts within window before restart + ) + now := time.Now() + if u.gwErrWindowStart.IsZero() { + u.gwErrWindowStart = now + u.gwErrCount = 1 + log.Printf("Gateway check error (1/%d); starting 5m observation window", retries) + return + } + + elapsed := now.Sub(u.gwErrWindowStart) + if elapsed >= window { + // Window elapsed; decide based on accumulated errors + if u.gwErrCount >= retries { + marker := filepath.Join(u.homeDir, ".needs_restart") + if err := os.WriteFile(marker, []byte("gateway-error-recover"), 0644); err != nil { + log.Printf("Failed to write restart marker after gateway errors: %v", err) + } else { + log.Printf("Gateway errors persisted (%d/%d) over >=5m; requesting SuperNode restart to recover gateway", u.gwErrCount, retries) + } + } + // Start a new window beginning now, with this error as the first hit + u.gwErrWindowStart = now + u.gwErrCount = 1 + return + } + + // Still within the window; increment and possibly announce threshold reached + u.gwErrCount++ + if u.gwErrCount < retries { + log.Printf("Gateway check error (%d/%d) within 5m; will retry", u.gwErrCount, retries) + return + } + // Threshold reached but do not restart until full window elapses + remaining := window - elapsed + log.Printf("Gateway error threshold reached; waiting %s before requesting SuperNode restart", remaining.Truncate(time.Second)) } diff --git a/sn-manager/internal/updater/updater_test.go b/sn-manager/internal/updater/updater_test.go deleted file mode 100644 index 1725659e..00000000 --- a/sn-manager/internal/updater/updater_test.go +++ /dev/null @@ -1,1232 +0,0 @@ -package updater - -import ( - "context" - "fmt" - "io/ioutil" - "net/http" - "net/http/httptest" - "os" - "path/filepath" - "sync" - "testing" - "time" - - "go.uber.org/mock/gomock" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "gopkg.in/yaml.v3" - - "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/config" - "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/github" - "github.com/LumeraProtocol/supernode/v2/sn-manager/internal/version" -) - -// setupTestEnvironment creates isolated test environment for updater tests -func setupTestEnvironment(t *testing.T) (string, func()) { - homeDir, err := ioutil.TempDir("", "updater-test-") - require.NoError(t, err) - - // Create required directories - dirs := []string{ - filepath.Join(homeDir, "binaries"), - filepath.Join(homeDir, "downloads"), - filepath.Join(homeDir, "logs"), - } - for _, dir := range dirs { - require.NoError(t, os.MkdirAll(dir, 0755)) - } - - cleanup := func() { - os.RemoveAll(homeDir) - } - - return homeDir, cleanup -} - -// createTestConfig creates a test configuration -func createTestConfig(t *testing.T, homeDir string, currentVersion string, autoUpgrade bool, checkInterval int) *config.Config { - cfg := &config.Config{ - Updates: config.UpdateConfig{ - CheckInterval: checkInterval, - AutoUpgrade: autoUpgrade, - CurrentVersion: currentVersion, - }, - } - - // Save config to file - configPath := filepath.Join(homeDir, "config.yml") - data, err := yaml.Marshal(cfg) - require.NoError(t, err) - require.NoError(t, ioutil.WriteFile(configPath, data, 0644)) - - return cfg -} - -// createMockBinary creates a mock binary file -func createMockBinary(t *testing.T, homeDir, version string) { - versionDir := filepath.Join(homeDir, "binaries", version) - require.NoError(t, os.MkdirAll(versionDir, 0755)) - - binaryPath := filepath.Join(versionDir, "supernode") - binaryContent := "#!/bin/sh\necho 'mock supernode " + version + "'\n" - require.NoError(t, ioutil.WriteFile(binaryPath, []byte(binaryContent), 0755)) -} - -// TestAutoUpdater_ShouldUpdate tests version comparison logic -func TestAutoUpdater_ShouldUpdate(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - updater := New(homeDir, cfg) - - tests := []struct { - name string - current string - latest string - expected bool - }{ - // Patch version updates (should update) - {"patch_update", "v1.0.0", "v1.0.1", true}, - {"patch_update_no_prefix", "1.0.0", "1.0.1", true}, - - // Minor version updates (should update within same major version) - {"minor_update", "v1.0.0", "v1.1.0", true}, - {"major_update", "v1.0.0", "v2.0.0", false}, - - // Same version (should not update) - {"same_version", "v1.0.0", "v1.0.0", false}, - - // Downgrade (should not update) - {"downgrade", "v1.0.1", "v1.0.0", false}, - - // Invalid versions (should not update) - {"invalid_current", "invalid", "v1.0.1", false}, - {"invalid_latest", "v1.0.0", "invalid", false}, - {"short_version", "v1.0", "v1.0.1", false}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := updater.ShouldUpdate(tt.current, tt.latest) - assert.Equal(t, tt.expected, result, "shouldUpdate(%s, %s) = %v, want %v", tt.current, tt.latest, result, tt.expected) - }) - } -} - -// TestAutoUpdater_IsGatewayIdle tests gateway status checking -func TestAutoUpdater_IsGatewayIdle(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - - tests := []struct { - name string - serverResponse string - statusCode int - expected bool - }{ - { - name: "gateway_idle", - serverResponse: `{ - "running_tasks": [] - }`, - statusCode: http.StatusOK, - expected: true, - }, - { - name: "gateway_busy", - serverResponse: `{ - "running_tasks": [ - { - "service_name": "test-service", - "task_ids": ["task1", "task2"], - "task_count": 2 - } - ] - }`, - statusCode: http.StatusOK, - expected: false, - }, - { - name: "gateway_error", - serverResponse: `{"error": "internal server error"}`, - statusCode: http.StatusInternalServerError, - expected: false, - }, - { - name: "invalid_json", - serverResponse: `invalid json`, - statusCode: http.StatusOK, - expected: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Create mock server - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(tt.statusCode) - w.Write([]byte(tt.serverResponse)) - })) - defer server.Close() - - // Create updater with custom gateway URL - updater := New(homeDir, cfg) - updater.gatewayURL = server.URL - - result := updater.isGatewayIdle() - assert.Equal(t, tt.expected, result) - }) - } - - t.Run("gateway_unreachable", func(t *testing.T) { - updater := New(homeDir, cfg) - updater.gatewayURL = "http://localhost:99999" // Non-existent port - - result := updater.isGatewayIdle() - assert.False(t, result) - }) -} - -// TestAutoUpdater_PerformUpdate tests the complete update process -func TestAutoUpdater_PerformUpdate(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - - // Create initial version - createMockBinary(t, homeDir, "v1.0.0") - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - // Create mock controller and client - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - // Setup expectations - targetVersion := "v1.0.1" - downloadURL := "https://example.com/supernode-v1.0.1" - - mockClient.EXPECT(). - GetSupernodeDownloadURL(targetVersion). - Return(downloadURL, nil) - - mockClient.EXPECT(). - DownloadBinary(downloadURL, gomock.Any(), gomock.Any()). - DoAndReturn(func(url, destPath string, progress func(int64, int64)) error { - // Simulate download by creating a mock binary - mockBinaryContent := "#!/bin/sh\necho 'mock supernode v1.0.1'\n" - return ioutil.WriteFile(destPath, []byte(mockBinaryContent), 0755) - }) - - // Create updater and inject mock client - updater := New(homeDir, cfg) - updater.githubClient = mockClient - - // Perform update - err := updater.performUpdate(targetVersion) - require.NoError(t, err) - - // Verify update was successful - assert.Equal(t, targetVersion, updater.config.Updates.CurrentVersion) - - // Verify version was installed - assert.True(t, updater.versionMgr.IsVersionInstalled(targetVersion)) - - // Verify current version was set - currentVersion, err := updater.versionMgr.GetCurrentVersion() - require.NoError(t, err) - assert.Equal(t, targetVersion, currentVersion) - - // Verify restart marker was created - markerPath := filepath.Join(homeDir, ".needs_restart") - markerContent, err := ioutil.ReadFile(markerPath) - require.NoError(t, err) - assert.Equal(t, targetVersion, string(markerContent)) - - // Verify config was updated - updatedConfig, err := config.Load(filepath.Join(homeDir, "config.yml")) - require.NoError(t, err) - assert.Equal(t, targetVersion, updatedConfig.Updates.CurrentVersion) -} - -// TestAutoUpdater_CheckAndUpdate tests the main update logic (Fixed Version) -func TestAutoUpdater_CheckAndUpdate(t *testing.T) { - tests := []struct { - name string - currentVersion string - latestVersion string - gatewayIdle bool - expectUpdate bool - expectError bool - }{ - { - name: "update_available_gateway_idle", - currentVersion: "v1.0.0", - latestVersion: "v1.0.1", - gatewayIdle: true, - expectUpdate: true, - }, - { - name: "update_available_gateway_busy", - currentVersion: "v1.0.0", - latestVersion: "v1.0.1", - gatewayIdle: false, - expectUpdate: false, - }, - { - name: "no_update_available", - currentVersion: "v1.0.1", - latestVersion: "v1.0.1", - gatewayIdle: true, - expectUpdate: false, - }, - { - name: "minor_version_update_should_proceed", - currentVersion: "v1.0.0", - latestVersion: "v1.1.0", - gatewayIdle: true, - expectUpdate: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Create isolated environment for each subtest - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, tt.currentVersion, true, 3600) - - // Create initial version - createMockBinary(t, homeDir, tt.currentVersion) - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion(tt.currentVersion)) - - // Setup mock gateway server - gatewayResponse := `{"running_tasks": []}` - if !tt.gatewayIdle { - gatewayResponse = `{"running_tasks": [{"service_name": "test", "task_count": 1}]}` - } - - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - w.Write([]byte(gatewayResponse)) - })) - defer gatewayServer.Close() - - // Create mock controller and client - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - // Setup GitHub client expectations - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{ - TagName: tt.latestVersion, - }, nil) - - if tt.expectUpdate { - mockClient.EXPECT(). - GetSupernodeDownloadURL(tt.latestVersion). - Return("https://example.com/binary", nil) - - mockClient.EXPECT(). - DownloadBinary(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(url, destPath string, progress func(int64, int64)) error { - content := "#!/bin/sh\necho 'mock binary'\n" - return ioutil.WriteFile(destPath, []byte(content), 0755) - }) - } - - // Create updater and inject mocks - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - // Verify initial state - no restart marker should exist - markerPath := filepath.Join(homeDir, ".needs_restart") - _, err := os.Stat(markerPath) - require.True(t, os.IsNotExist(err), "Restart marker should not exist initially") - - // Run check and update - ctx := context.Background() - updater.checkAndUpdate(ctx) - - // Verify results - if tt.expectUpdate { - assert.Equal(t, tt.latestVersion, updater.config.Updates.CurrentVersion, "Config should be updated to new version") - - // Verify restart marker exists - _, err := os.Stat(markerPath) - assert.NoError(t, err, "Restart marker should exist after successful update") - - // Verify marker content - markerContent, err := ioutil.ReadFile(markerPath) - require.NoError(t, err) - assert.Equal(t, tt.latestVersion, string(markerContent), "Restart marker should contain the new version") - - // Verify new version is installed - assert.True(t, updater.versionMgr.IsVersionInstalled(tt.latestVersion), "New version should be installed") - - // Verify current version is set - currentVersion, err := updater.versionMgr.GetCurrentVersion() - require.NoError(t, err) - assert.Equal(t, tt.latestVersion, currentVersion, "Current version should be updated") - } else { - assert.Equal(t, tt.currentVersion, updater.config.Updates.CurrentVersion, "Config should remain unchanged") - - // Verify no restart marker - _, err := os.Stat(markerPath) - assert.True(t, os.IsNotExist(err), "Restart marker should not exist when no update occurred") - } - - t.Logf("✅ Test case '%s' completed successfully", tt.name) - }) - } -} - -// Additional test to verify restart marker cleanup -func TestAutoUpdater_RestartMarkerHandling(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - createMockBinary(t, homeDir, "v1.0.0") - - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - // Create existing restart marker (simulating previous update) - markerPath := filepath.Join(homeDir, ".needs_restart") - require.NoError(t, ioutil.WriteFile(markerPath, []byte("v0.9.0"), 0644)) - - // Verify marker exists initially - _, err := os.Stat(markerPath) - require.NoError(t, err, "Restart marker should exist initially") - - // Setup mocks for no update scenario - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.0"}, nil) // Same version - - // Setup idle gateway - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - // Run check and update (should not update) - ctx := context.Background() - updater.checkAndUpdate(ctx) - - // Verify existing restart marker is still there (not removed by checkAndUpdate) - _, err = os.Stat(markerPath) - assert.NoError(t, err, "Existing restart marker should not be removed by checkAndUpdate") - - // Verify content is unchanged - content, err := ioutil.ReadFile(markerPath) - require.NoError(t, err) - assert.Equal(t, "v0.9.0", string(content), "Existing restart marker content should be unchanged") -} - -// Test to verify behavior when version manager operations fail -func TestAutoUpdater_VersionManagerErrors(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - - // Create initial version and set it up properly - createMockBinary(t, homeDir, "v1.0.0") - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - // Make the binaries directory read-only to cause installation failures - binariesDir := filepath.Join(homeDir, "binaries") - require.NoError(t, os.Chmod(binariesDir, 0444)) // Read-only - - // Restore permissions in cleanup to allow directory removal - defer func() { - os.Chmod(binariesDir, 0755) - }() - - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.1"}, nil) - - mockClient.EXPECT(). - GetSupernodeDownloadURL("v1.0.1"). - Return("https://example.com/binary", nil) - - mockClient.EXPECT(). - DownloadBinary(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(url, destPath string, progress func(int64, int64)) error { - content := "#!/bin/sh\necho 'mock binary'\n" - return ioutil.WriteFile(destPath, []byte(content), 0755) - }) - - // Setup idle gateway - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - // This should handle version manager errors gracefully - ctx := context.Background() - updater.checkAndUpdate(ctx) - - // Version should remain unchanged due to installation failure - assert.Equal(t, "v1.0.0", updater.config.Updates.CurrentVersion) - - // No restart marker should be created due to failure - markerPath := filepath.Join(homeDir, ".needs_restart") - _, err := os.Stat(markerPath) - assert.True(t, os.IsNotExist(err), "No restart marker should exist after failed update") -} - -// Alternative test with download failure -func TestAutoUpdater_DownloadFailure(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - createMockBinary(t, homeDir, "v1.0.0") - - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.1"}, nil) - - mockClient.EXPECT(). - GetSupernodeDownloadURL("v1.0.1"). - Return("https://example.com/binary", nil) - - // Simulate download failure - mockClient.EXPECT(). - DownloadBinary(gomock.Any(), gomock.Any(), gomock.Any()). - Return(fmt.Errorf("download failed: network error")) - - // Setup idle gateway - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - // This should handle download errors gracefully - ctx := context.Background() - updater.checkAndUpdate(ctx) - - // Version should remain unchanged due to download failure - assert.Equal(t, "v1.0.0", updater.config.Updates.CurrentVersion) - - // No restart marker should be created due to failure - markerPath := filepath.Join(homeDir, ".needs_restart") - _, err := os.Stat(markerPath) - assert.True(t, os.IsNotExist(err), "No restart marker should exist after failed download") -} - -// Test config save failure -func TestAutoUpdater_ConfigSaveFailure(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - createMockBinary(t, homeDir, "v1.0.0") - - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - // Make the home directory read-only to cause config save failure - require.NoError(t, os.Chmod(homeDir, 0444)) - defer func() { - os.Chmod(homeDir, 0755) // Restore for cleanup - }() - - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.1"}, nil) - - mockClient.EXPECT(). - GetSupernodeDownloadURL("v1.0.1"). - Return("https://example.com/binary", nil) - - mockClient.EXPECT(). - DownloadBinary(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(url, destPath string, progress func(int64, int64)) error { - content := "#!/bin/sh\necho 'mock binary'\n" - return ioutil.WriteFile(destPath, []byte(content), 0755) - }) - - // Setup idle gateway - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - // This should handle config save errors - ctx := context.Background() - updater.checkAndUpdate(ctx) - - // The update might partially succeed but config save should fail - // The exact behavior depends on implementation - let's just verify it doesn't crash - t.Log("Config save failure test completed without panic") -} - -// Simpler test that definitely causes failure -func TestAutoUpdater_InstallationFailure(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - createMockBinary(t, homeDir, "v1.0.0") - - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.1"}, nil) - - mockClient.EXPECT(). - GetSupernodeDownloadURL("v1.0.1"). - Return("https://example.com/binary", nil) - - // Download succeeds but creates a file in a location that will cause installation to fail - mockClient.EXPECT(). - DownloadBinary(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(url, destPath string, progress func(int64, int64)) error { - // Create an invalid binary (directory instead of file) - return os.Mkdir(destPath, 0755) - }) - - // Setup idle gateway - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - // This should handle installation errors gracefully - ctx := context.Background() - updater.checkAndUpdate(ctx) - - // Version should remain unchanged due to installation failure - assert.Equal(t, "v1.0.0", updater.config.Updates.CurrentVersion) - - // No restart marker should be created due to failure - markerPath := filepath.Join(homeDir, ".needs_restart") - _, err := os.Stat(markerPath) - assert.True(t, os.IsNotExist(err), "No restart marker should exist after failed installation") -} - -// Test concurrent access to updater -func TestAutoUpdater_ConcurrentAccess(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - createMockBinary(t, homeDir, "v1.0.0") - - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - // Allow multiple calls (for concurrent access) - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.0"}, nil). - AnyTimes() - - // Setup idle gateway - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - // Run multiple concurrent checkAndUpdate calls - const numGoroutines = 5 - var wg sync.WaitGroup - errors := make(chan error, numGoroutines) - - for i := 0; i < numGoroutines; i++ { - wg.Add(1) - go func(id int) { - defer wg.Done() - defer func() { - if r := recover(); r != nil { - errors <- fmt.Errorf("goroutine %d panicked: %v", id, r) - } - }() - - ctx := context.Background() - updater.checkAndUpdate(ctx) - errors <- nil - }(i) - } - - wg.Wait() - close(errors) - - // Verify no errors occurred - for err := range errors { - assert.NoError(t, err) - } - - // Verify system is in consistent state - assert.Equal(t, "v1.0.0", updater.config.Updates.CurrentVersion) -} - -// TestAutoUpdater_StartStop tests auto-updater lifecycle -func TestAutoUpdater_StartStop(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - t.Run("auto_upgrade_disabled", func(t *testing.T) { - cfg := createTestConfig(t, homeDir, "v1.0.0", false, 1) // 1 second interval - updater := New(homeDir, cfg) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Start should return immediately if auto-upgrade is disabled - updater.Start(ctx) - - // Stop should work without issues - updater.Stop() - - // No ticker should be created - assert.Nil(t, updater.ticker) - }) - - t.Run("auto_upgrade_enabled", func(t *testing.T) { - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 1) // 1 second interval - - // Create mock controller and client - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - // Expect at least one call to GetLatestRelease - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{ - TagName: "v1.0.0", // Same version, no update - }, nil). - AnyTimes() - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - defer cancel() - - // Start auto-updater - updater.Start(ctx) - - // Let it run for a bit - time.Sleep(2 * time.Second) - - // Stop should work - updater.Stop() - - // Ticker should have been created - assert.NotNil(t, updater.ticker) - }) -} - -// TestAutoUpdater_ErrorHandling tests error scenarios -func TestAutoUpdater_ErrorHandling(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) - - t.Run("github_api_error", func(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - mockClient.EXPECT(). - GetLatestRelease(). - Return(nil, assert.AnError) - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - - // Should not panic or crash - ctx := context.Background() - updater.checkAndUpdate(ctx) - - // Version should remain unchanged - assert.Equal(t, "v1.0.0", updater.config.Updates.CurrentVersion) - }) - - t.Run("download_url_error", func(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.1"}, nil) - - mockClient.EXPECT(). - GetSupernodeDownloadURL("v1.0.1"). - Return("", assert.AnError) - - // Setup idle gateway - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - ctx := context.Background() - updater.checkAndUpdate(ctx) - - // Version should remain unchanged - assert.Equal(t, "v1.0.0", updater.config.Updates.CurrentVersion) - }) - - t.Run("download_binary_error", func(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.1"}, nil) - - mockClient.EXPECT(). - GetSupernodeDownloadURL("v1.0.1"). - Return("https://example.com/binary", nil) - - // Simulate download failure - mockClient.EXPECT(). - DownloadBinary(gomock.Any(), gomock.Any(), gomock.Any()). - Return(assert.AnError) - - // Setup idle gateway - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - ctx := context.Background() - updater.checkAndUpdate(ctx) - - // Version should remain unchanged - assert.Equal(t, "v1.0.0", updater.config.Updates.CurrentVersion) - }) -} - -// / TestAutoUpdater_Integration tests end-to-end auto-update scenarios (Fixed Version) -func TestAutoUpdater_Integration(t *testing.T) { - if testing.Short() { - t.Skip("Skipping integration test in short mode") - } - - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - // Create initial setup - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 2) // 2 second interval - createMockBinary(t, homeDir, "v1.0.0") - - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - // Setup mock gateway (idle) - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - // Create mock controller and client - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - // Set up call sequence expectations: - // 1. First call returns same version (no update) - // 2. Second call returns new version (update available) - // 3. Subsequent calls return the new version (no more updates) - - gomock.InOrder( - // First call - no update - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.0"}, nil), - - // Second call - update available - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.1"}, nil), - - // Third and subsequent calls - no more updates - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.1"}, nil). - AnyTimes(), // Allow any number of subsequent calls - ) - - // Expect download operations for the update - mockClient.EXPECT(). - GetSupernodeDownloadURL("v1.0.1"). - Return("https://example.com/binary", nil) - - mockClient.EXPECT(). - DownloadBinary(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(url, destPath string, progress func(int64, int64)) error { - content := "#!/bin/sh\necho 'mock supernode v1.0.1'\n" - if progress != nil { - progress(100, 100) // Report full download - } - return ioutil.WriteFile(destPath, []byte(content), 0755) - }) - - // Create and start updater - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second) - defer cancel() - - // Start the updater - updater.Start(ctx) - - // Wait for the update to happen - // We expect: t=0s (no update), t=2s (update), t=4s (no update) - time.Sleep(5 * time.Second) - - // Stop the updater - updater.Stop() - - // Verify the update occurred - assert.Equal(t, "v1.0.1", updater.config.Updates.CurrentVersion) - - // Verify new version is installed - assert.True(t, updater.versionMgr.IsVersionInstalled("v1.0.1")) - - // Verify restart marker exists - markerPath := filepath.Join(homeDir, ".needs_restart") - markerContent, err := ioutil.ReadFile(markerPath) - require.NoError(t, err) - assert.Equal(t, "v1.0.1", string(markerContent)) -} - -// Alternative approach: Test with manual trigger instead of timer -func TestAutoUpdater_ManualUpdateFlow(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - // Create initial setup - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 3600) // Long interval to avoid timer - createMockBinary(t, homeDir, "v1.0.0") - - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - // Setup mock gateway (idle) - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - // Test scenario 1: No update available - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.0"}, nil) - - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - ctx := context.Background() - - // First check - no update - updater.checkAndUpdate(ctx) - assert.Equal(t, "v1.0.0", updater.config.Updates.CurrentVersion) - - // Test scenario 2: Update available - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.1"}, nil) - - mockClient.EXPECT(). - GetSupernodeDownloadURL("v1.0.1"). - Return("https://example.com/binary", nil) - - mockClient.EXPECT(). - DownloadBinary(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(url, destPath string, progress func(int64, int64)) error { - content := "#!/bin/sh\necho 'mock supernode v1.0.1'\n" - if progress != nil { - progress(50, 100) // Partial progress - progress(100, 100) // Complete - } - return ioutil.WriteFile(destPath, []byte(content), 0755) - }) - - // Second check - update available - updater.checkAndUpdate(ctx) - - // Verify the update occurred - assert.Equal(t, "v1.0.1", updater.config.Updates.CurrentVersion) - assert.True(t, updater.versionMgr.IsVersionInstalled("v1.0.1")) - - // Verify restart marker - markerPath := filepath.Join(homeDir, ".needs_restart") - markerContent, err := ioutil.ReadFile(markerPath) - require.NoError(t, err) - assert.Equal(t, "v1.0.1", string(markerContent)) - - // Test scenario 3: Gateway busy, should skip update - // Create busy gateway server - busyGatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": [{"service_name": "test", "task_count": 1}]}`)) - })) - defer busyGatewayServer.Close() - - updater.gatewayURL = busyGatewayServer.URL - - // Reset to simulate new version available but gateway busy - updater.config.Updates.CurrentVersion = "v1.0.1" - - mockClient.EXPECT(). - GetLatestRelease(). - Return(&github.Release{TagName: "v1.0.2"}, nil) - - // Should not expect download calls because gateway is busy - updater.checkAndUpdate(ctx) - - // Version should remain unchanged - assert.Equal(t, "v1.0.1", updater.config.Updates.CurrentVersion) - assert.False(t, updater.versionMgr.IsVersionInstalled("v1.0.2")) -} - -// Test with shorter intervals but controlled timing -func TestAutoUpdater_TimedIntegration(t *testing.T) { - if testing.Short() { - t.Skip("Skipping timed integration test in short mode") - } - - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - // Create initial setup with very short interval for faster testing - cfg := createTestConfig(t, homeDir, "v1.0.0", true, 1) // 1 second interval - createMockBinary(t, homeDir, "v1.0.0") - - versionMgr := version.NewManager(homeDir) - require.NoError(t, versionMgr.SetCurrentVersion("v1.0.0")) - - // Setup mock gateway (idle) - gatewayServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(`{"running_tasks": []}`)) - })) - defer gatewayServer.Close() - - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockClient := github.NewMockGithubClient(ctrl) - - // Expect multiple calls but control the sequence - callCount := 0 - mockClient.EXPECT(). - GetLatestRelease(). - DoAndReturn(func() (*github.Release, error) { - callCount++ - if callCount == 1 { - // First call - no update - return &github.Release{TagName: "v1.0.0"}, nil - } else if callCount == 2 { - // Second call - update available - return &github.Release{TagName: "v1.0.1"}, nil - } else { - // Subsequent calls - no more updates - return &github.Release{TagName: "v1.0.1"}, nil - } - }). - AnyTimes() - - // Expect download operations (will only be called once) - mockClient.EXPECT(). - GetSupernodeDownloadURL("v1.0.1"). - Return("https://example.com/binary", nil). - MaxTimes(1) - - mockClient.EXPECT(). - DownloadBinary(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(url, destPath string, progress func(int64, int64)) error { - content := "#!/bin/sh\necho 'mock supernode v1.0.1'\n" - if progress != nil { - progress(100, 100) - } - return ioutil.WriteFile(destPath, []byte(content), 0755) - }). - MaxTimes(1) - - // Create and start updater - updater := New(homeDir, cfg) - updater.githubClient = mockClient - updater.gatewayURL = gatewayServer.URL - - ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second) - defer cancel() - - // Start the updater - updater.Start(ctx) - - // Wait for update to complete - time.Sleep(3 * time.Second) - - // Stop the updater - updater.Stop() - - // Verify the update occurred - assert.Equal(t, "v1.0.1", updater.config.Updates.CurrentVersion) - assert.True(t, updater.versionMgr.IsVersionInstalled("v1.0.1")) - - // Verify restart marker - markerPath := filepath.Join(homeDir, ".needs_restart") - markerContent, err := ioutil.ReadFile(markerPath) - require.NoError(t, err) - assert.Equal(t, "v1.0.1", string(markerContent)) - - t.Logf("Total GetLatestRelease calls: %d", callCount) - assert.GreaterOrEqual(t, callCount, 2, "Should have made at least 2 calls") -} - -// TestAutoUpdater_UpdatePolicyLogic tests the update policy (only patch updates) -func TestAutoUpdater_UpdatePolicyLogic(t *testing.T) { - homeDir, cleanup := setupTestEnvironment(t) - defer cleanup() - - updateScenarios := []struct { - name string - currentVersion string - latestVersion string - shouldUpdate bool - description string - }{ - { - name: "patch_update_allowed", - currentVersion: "v1.2.3", - latestVersion: "v1.2.4", - shouldUpdate: true, - description: "Patch updates (1.2.3 -> 1.2.4) should be allowed", - }, - { - name: "minor_update_allowed", - currentVersion: "v1.2.3", - latestVersion: "v1.3.0", - shouldUpdate: true, - description: "Minor updates (1.2.x -> 1.3.x) should be allowed within same major version", - }, - { - name: "major_update_blocked", - currentVersion: "v1.2.3", - latestVersion: "v2.0.0", - shouldUpdate: false, - description: "Major updates (1.x.x -> 2.x.x) should be blocked", - }, - { - name: "same_version_no_update", - currentVersion: "v1.2.3", - latestVersion: "v1.2.3", - shouldUpdate: false, - description: "Same version should not trigger update", - }, - } - - for _, scenario := range updateScenarios { - t.Run(scenario.name, func(t *testing.T) { - cfg := createTestConfig(t, homeDir, scenario.currentVersion, true, 3600) - updater := New(homeDir, cfg) - - result := updater.ShouldUpdate(scenario.currentVersion, scenario.latestVersion) - assert.Equal(t, scenario.shouldUpdate, result, scenario.description) - }) - } -} diff --git a/sn-manager/internal/utils/version.go b/sn-manager/internal/utils/version.go index 7ec32a8e..b89d8b41 100644 --- a/sn-manager/internal/utils/version.go +++ b/sn-manager/internal/utils/version.go @@ -1,41 +1,142 @@ package utils import ( - "strconv" - "strings" + "strconv" + "strings" ) -// CompareVersions compares two version strings -// Returns: -1 if v1 < v2, 0 if v1 == v2, 1 if v1 > v2 +// CompareVersions compares two semantic versions (SemVer 2.0.0) +// - Handles leading 'v' +// - Ignores build metadata (after '+') +// - Properly compares pre-release identifiers (lower precedence than normal) +// Returns: -1 if v1 < v2, 0 if equal, 1 if v1 > v2 func CompareVersions(v1, v2 string) int { - v1 = strings.TrimPrefix(v1, "v") - v2 = strings.TrimPrefix(v2, "v") + p1 := parseSemver(v1) + p2 := parseSemver(v2) - parts1 := strings.Split(v1, ".") - parts2 := strings.Split(v2, ".") + // Compare core version + if p1.major != p2.major { + if p1.major < p2.major { + return -1 + } + return 1 + } + if p1.minor != p2.minor { + if p1.minor < p2.minor { + return -1 + } + return 1 + } + if p1.patch != p2.patch { + if p1.patch < p2.patch { + return -1 + } + return 1 + } + + // If core equal, pre-release precedence: absence > presence + if len(p1.prerelease) == 0 && len(p2.prerelease) == 0 { + return 0 + } + if len(p1.prerelease) == 0 { + return 1 + } + if len(p2.prerelease) == 0 { + return -1 + } + + // Compare pre-release identifiers + max := len(p1.prerelease) + if len(p2.prerelease) > max { + max = len(p2.prerelease) + } + for i := 0; i < max; i++ { + id1 := "" + id2 := "" + if i < len(p1.prerelease) { + id1 = p1.prerelease[i] + } + if i < len(p2.prerelease) { + id2 = p2.prerelease[i] + } - for i := 0; i < len(parts1) && i < len(parts2); i++ { - n1, err1 := strconv.Atoi(parts1[i]) - n2, err2 := strconv.Atoi(parts2[i]) - - if err1 != nil || err2 != nil { - return 0 + if id1 == id2 { + continue } - if n1 < n2 { + n1, e1 := strconv.Atoi(id1) + n2, e2 := strconv.Atoi(id2) + if e1 == nil && e2 == nil { + if n1 < n2 { + return -1 + } + return 1 + } + if e1 == nil && e2 != nil { + // Numeric identifiers have lower precedence than non-numeric return -1 } - if n1 > n2 { + if e1 != nil && e2 == nil { return 1 } + if id1 < id2 { + return -1 + } + return 1 } - if len(parts1) < len(parts2) { + // All identifiers equal; shorter set has lower precedence + if len(p1.prerelease) < len(p2.prerelease) { return -1 } - if len(parts1) > len(parts2) { + if len(p1.prerelease) > len(p2.prerelease) { return 1 } - return 0 -} \ No newline at end of file +} + +// SameMajor reports whether two versions have the same major component. +// It ignores leading 'v', build metadata, and pre-release suffixes when +// determining the major version. +func SameMajor(v1, v2 string) bool { + p1 := parseSemver(v1) + p2 := parseSemver(v2) + return p1.major == p2.major +} + +type semverParts struct { + major int + minor int + patch int + prerelease []string +} + +func parseSemver(v string) semverParts { + v = strings.TrimPrefix(v, "v") + // Strip build metadata + if i := strings.IndexByte(v, '+'); i >= 0 { + v = v[:i] + } + core := v + pre := "" + if i := strings.IndexByte(v, '-'); i >= 0 { + core = v[:i] + pre = v[i+1:] + } + maj, min, pat := 0, 0, 0 + parts := strings.Split(core, ".") + if len(parts) > 0 { + maj, _ = strconv.Atoi(parts[0]) + } + if len(parts) > 1 { + min, _ = strconv.Atoi(parts[1]) + } + if len(parts) > 2 { + pat, _ = strconv.Atoi(parts[2]) + } + var preIDs []string + if pre != "" { + preIDs = strings.Split(pre, ".") + } + return semverParts{major: maj, minor: min, patch: pat, prerelease: preIDs} +} diff --git a/sn-manager/internal/version/manager.go b/sn-manager/internal/version/manager.go index 18ae8002..5aa361b1 100644 --- a/sn-manager/internal/version/manager.go +++ b/sn-manager/internal/version/manager.go @@ -2,6 +2,8 @@ package version import ( "fmt" + "io" + "log" "os" "path/filepath" "sort" @@ -60,19 +62,37 @@ func (m *Manager) InstallVersion(version string, binaryPath string) error { destBinary := m.GetVersionBinary(version) tempBinary := destBinary + ".tmp" - // Copy binary to temp location first - input, err := os.ReadFile(binaryPath) + // Stream copy binary to temp location first to avoid high memory usage + src, err := os.Open(binaryPath) if err != nil { - return fmt.Errorf("failed to read binary: %w", err) + return fmt.Errorf("failed to open binary: %w", err) } + defer src.Close() - if err := os.WriteFile(tempBinary, input, 0755); err != nil { - return fmt.Errorf("failed to write binary: %w", err) + dst, err := os.OpenFile(tempBinary, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0755) + if err != nil { + return fmt.Errorf("failed to create temp binary: %w", err) + } + + if _, err := io.Copy(dst, src); err != nil { + dst.Close() + if rmErr := os.Remove(tempBinary); rmErr != nil && !os.IsNotExist(rmErr) { + log.Printf("Warning: failed to cleanup temp binary after copy error: %v", rmErr) + } + return fmt.Errorf("failed to copy binary: %w", err) + } + if err := dst.Close(); err != nil { + if rmErr := os.Remove(tempBinary); rmErr != nil && !os.IsNotExist(rmErr) { + log.Printf("Warning: failed to cleanup temp binary after close error: %v", rmErr) + } + return fmt.Errorf("failed to close temp binary: %w", err) } // Atomic rename if err := os.Rename(tempBinary, destBinary); err != nil { - os.Remove(tempBinary) + if rmErr := os.Remove(tempBinary); rmErr != nil && !os.IsNotExist(rmErr) { + log.Printf("Warning: failed to cleanup temp binary after rename error: %v", rmErr) + } return fmt.Errorf("failed to install binary: %w", err) } @@ -91,15 +111,19 @@ func (m *Manager) SetCurrentVersion(version string) error { // Create new symlink with temp name tempLink := currentLink + ".tmp" - os.Remove(tempLink) // cleanup any leftover - + if err := os.Remove(tempLink); err != nil && !os.IsNotExist(err) { + log.Printf("Warning: failed to remove existing temp link: %v", err) + } + if err := os.Symlink(targetDir, tempLink); err != nil { return fmt.Errorf("failed to create symlink: %w", err) } // Atomic rename if err := os.Rename(tempLink, currentLink); err != nil { - os.Remove(tempLink) + if rmErr := os.Remove(tempLink); rmErr != nil && !os.IsNotExist(rmErr) { + log.Printf("Warning: failed to cleanup temp link: %v", rmErr) + } return fmt.Errorf("failed to update symlink: %w", err) } @@ -139,11 +163,7 @@ func (m *Manager) ListVersions() ([]string, error) { var versions []string for _, entry := range entries { if entry.IsDir() { - // Check if it contains a supernode binary - binaryPath := filepath.Join(binariesDir, entry.Name(), "supernode") - if _, err := os.Stat(binaryPath); err == nil { - versions = append(versions, entry.Name()) - } + versions = append(versions, entry.Name()) } } @@ -154,4 +174,3 @@ func (m *Manager) ListVersions() ([]string, error) { return versions, nil } -