diff --git a/README.md b/README.md index 48b9ff5..c91b022 100644 --- a/README.md +++ b/README.md @@ -13,14 +13,14 @@ Sekin orchestrates multiple microservices in a containerized environment: │ Sekin Infrastructure │ ├─────────────────────────────────────────────────────────────┤ │ │ -│ ┌──────────┐ ┌──────────┐ ┌───────────────────────┐ │ -│ │ Sekai │ │ Shidai │ │ Interx Manager │ │ -│ │ (Cosmos) │ │ (Infra) │ │ (P2P/HTTP Server) │ │ -│ └────┬─────┘ └────┬─────┘ └───────────┬───────────┘ │ -│ │ │ │ │ -│ ┌────┴─────────────┴─────────────────────┴───────────┐ │ -│ │ Centralized Logging (Syslog-ng) │ │ -│ └────────────────────────────────────────────────────┘ │ +│ ┌────────────────────┐ ┌───────────────────────┐ │ +│ │ Sekai + Scaller │ │ Interx Manager │ │ +│ │ (Cosmos + CLI) │ │ (P2P/HTTP Server) │ │ +│ └─────────┬──────────┘ └───────────┬───────────┘ │ +│ │ │ │ +│ ┌─────────┴──────────────────────────┴───────────┐ │ +│ │ Centralized Logging (Syslog-ng) │ │ +│ └────────────────────────────────────────────────┘ │ │ │ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │ │ Storage │ │ Proxy │ │ MongoDB │ │ @@ -40,18 +40,13 @@ Sekin orchestrates multiple microservices in a containerized environment: ### Core Services -**Sekai (v0.4.13)** +**Sekai with Scaller** - KIRA blockchain node built on Cosmos SDK -- Provides consensus, state management, and transaction processing +- Includes scaller CLI for node bootstrapping, management, and monitoring - Ports: 26657 (RPC), 26656 (P2P), 9090 (gRPC), 1317 (REST API) +- Built from `sekai.Dockerfile` (contains sekaid + scaller) -**Shidai (v0.15.2)** -- Infrastructure management and orchestration service -- Monitors blockchain status and manages container lifecycle -- Provides API endpoint on port 8282 -- Built from source in `src/shidai/` - -**Syslog-ng (v0.15.2)** +**Syslog-ng** - Centralized logging server for all services - Collects logs via UDP/TCP on port 514 - Configured with log rotation and retention policies @@ -107,7 +102,6 @@ All services run on a custom bridge network `kiranet` (10.1.0.0/16): | Syslog-ng | 10.1.0.2 | syslog-ng.local | | Sekai | 10.1.0.3 | sekai.local | | Manager | 10.1.0.4 | manager.local | -| Shidai | 10.1.0.5 | shidai.local | | Proxy | 10.1.0.10 | proxy.local | | Storage | 10.1.0.11 | storage.local | | Cosmos Indexer | 10.1.0.12 | cosmos-indexer.local | @@ -191,34 +185,18 @@ docker compose logs -f sekai tail -f ./syslog-data/sekai.log ``` -## Building Images Independently - -Apart from using Docker Compose, you can build Sekai and Interx independently. +## Building Images -### Build Sekai +Build the sekai image (includes sekaid + scaller): ```bash -./scripts/docker-sekaid-build.sh v0.4.13 +docker build -f sekai.Dockerfile -t sekai:latest . ``` -### Build Interx +Build the syslog-ng image: ```bash -./scripts/docker-interxd-build.sh v0.7.0 -``` - -### Run Containers - -**Run Sekai:** - -```bash -./scripts/docker-sekaid-run.sh v0.4.13 -``` - -**Run Interx:** - -```bash -./scripts/docker-interxd-run.sh v0.7.0 +docker build -f syslog-ng.Dockerfile -t syslog-ng:latest . ``` ## Port Mappings @@ -243,11 +221,9 @@ Apart from using Docker Compose, you can build Sekai and Interx independently. |-------|------------|-----------------------------| | 26658 | Sekai | ABCI | | 26660 | Sekai | Prometheus Metrics | -| 8181 | Sekai | RPC sCaller | | 1317 | Sekai | REST API | | 9090 | Sekai | gRPC | | 8080 | Manager | HTTP Server | -| 8282 | Shidai | Infrastructure Manager API | | 514 | Syslog-ng | Syslog Server (UDP/TCP) | ## Configuration @@ -274,7 +250,7 @@ Sekin uses GitHub Actions for automated image building, signing, and deployment: **`ci.yml` - Build and Release** - Triggers on PR merge to `main` branch - Creates semantic version tags -- Builds and pushes Shidai and Syslog-ng images to GHCR +- Builds and pushes Sekai and Syslog-ng images to GHCR - Signs images with Cosign (Sigstore) - Updates `compose.yml` with new versions @@ -294,7 +270,77 @@ Sekin uses GitHub Actions for automated image building, signing, and deployment: All images are signed using Cosign for supply chain security. Verify signatures: ```bash -cosign verify --key cosign.pub ghcr.io/kiracore/sekin/shidai:v0.15.2 +cosign verify --key cosign.pub ghcr.io/kiracore/sekin/sekai:latest +``` + +## Scaller CLI + +Scaller is a Go CLI tool bundled in the sekai container for node bootstrapping, management, and monitoring. + +### Commands + +| Command | Description | +|---------|-------------| +| `wait` | Wait indefinitely (container entrypoint) | +| `init` | Initialize new sekaid node | +| `keys-add` | Add key to keyring | +| `add-genesis-account` | Add account to genesis | +| `gentx-claim` | Claim validator role in genesis | +| `join` | Initialize node and join existing network | +| `start` | Start sekaid (with optional restart) | +| `status` | Show node and network status | +| `version` | Show scaller version | + +### Usage Examples + +```bash +# Initialize a new node +docker exec sekin-sekai-1 /scaller init --chain-id testnet-1 --moniker MyNode + +# Join an existing network +docker exec sekin-sekai-1 /scaller join \ + --chain-id kira-1 \ + --rpc "https://rpc.kira.network:26657" \ + --moniker MyNode + +# Start sekaid (replaces process) +docker exec sekin-sekai-1 /scaller start + +# Start with auto-restart (up to 5 retries) +docker exec sekin-sekai-1 /scaller start --restart 5 + +# Start with auto-restart (max 10 retries) +docker exec sekin-sekai-1 /scaller start --restart always + +# Check node status (defaults: rpc=localhost:26657, interx=proxy.local:8080) +docker exec sekin-sekai-1 /scaller status +``` + +### Status Output + +The `status` command displays a table showing: + +| Field | Description | +|-------|-------------| +| Sekai | Node health (OK/SYNCING/DOWN) and block height | +| Interx | Interx service health | +| Peers | Number of connected peers | +| Node ID | Unique node identifier (for peer connections) | +| Chain | Network chain ID | +| Moniker | Node's moniker name | +| Validator | Validator status and voting power | + +Example output: +``` +SERVICE STATUS DETAIL +---------- ------- ------------------------------ +Sekai [+] OK height 12345 +Interx [+] OK responding +Peers [+] OK 5 connected +Node ID [ ] INFO a1b2c3d4e5f6... +Chain [ ] INFO kira-1 +Moniker [ ] INFO MyNode +Validator [+] OK power 100 ``` ## Monitoring and Maintenance @@ -302,11 +348,14 @@ cosign verify --key cosign.pub ghcr.io/kiracore/sekin/shidai:v0.15.2 ### View Service Status ```bash -# Check Shidai status -curl http://localhost:8282/status - # Check blockchain status via RPC curl http://localhost:26657/status + +# Check node status with scaller +docker exec sekin-sekai-1 /scaller status + +# Check scaller version +docker exec sekin-sekai-1 /scaller version ``` ### Access Logs @@ -348,11 +397,7 @@ docker compose up -d ``` sekin/ ├── src/ -│ ├── shidai/ # Infrastructure manager -│ ├── sCaller/ # Sekai command executor -│ ├── iCaller/ # Interx command executor -│ ├── exporter/ # Metrics exporter -│ └── updater/ # Upgrade manager +│ └── sCaller/ # Scaller CLI (node bootstrap & management) ├── manager/ # Interx Manager (P2P/HTTP) ├── proxy/ # Interx Proxy ├── worker/ # Interx Worker services @@ -361,6 +406,8 @@ sekin/ │ └── sai-storage-mongo/ # Storage service ├── scripts/ # Utility scripts ├── config/ # Configuration files +├── sekai.Dockerfile # Sekai + Scaller image +├── syslog-ng.Dockerfile # Syslog-ng image └── compose.yml # Production compose file ``` @@ -369,8 +416,8 @@ sekin/ Each component can be built independently using its respective Dockerfile: ```bash -# Build Shidai -docker build -f shidai.Dockerfile -t sekin/shidai:custom . +# Build Sekai (includes scaller CLI) +docker build -f sekai.Dockerfile -t sekin/sekai:custom . # Build Syslog-ng docker build -f syslog-ng.Dockerfile -t sekin/syslog-ng:custom . @@ -400,7 +447,7 @@ docker network inspect kiranet Check for existing processes using required ports: ```bash -sudo netstat -tulpn | grep -E '26657|26656|8080|8282' +sudo netstat -tulpn | grep -E '26657|26656|8080' ``` ### Disk Space diff --git a/src/sCaller/internal/cli/root.go b/src/sCaller/internal/cli/root.go index 8fcfd7e..4ab8ffb 100644 --- a/src/sCaller/internal/cli/root.go +++ b/src/sCaller/internal/cli/root.go @@ -19,7 +19,8 @@ Commands: add-genesis-account - Add account to genesis gentx-claim - Claim validator role in genesis join - Initialize node and join existing network - start - Start sekaid (replaces this process)`, + start - Start sekaid (with optional restart) + status - Show node and network status`, } func Execute() error { @@ -34,6 +35,7 @@ func init() { rootCmd.AddCommand(gentxClaimCmd) rootCmd.AddCommand(joinCmd) rootCmd.AddCommand(startCmd) + rootCmd.AddCommand(statusCmd) rootCmd.AddCommand(versionCmd) } diff --git a/src/sCaller/internal/cli/start.go b/src/sCaller/internal/cli/start.go index e07fbb9..76c6723 100644 --- a/src/sCaller/internal/cli/start.go +++ b/src/sCaller/internal/cli/start.go @@ -1,8 +1,11 @@ package cli import ( + "fmt" "os" + "os/exec" "syscall" + "time" "github.com/spf13/cobra" ) @@ -11,28 +14,111 @@ const sekaidPath = "/sekaid" var startCmd = &cobra.Command{ Use: "start", - Short: "Start sekaid (replaces this process)", - Long: `Starts sekaid using syscall.Exec, replacing this process entirely.`, - Run: runStart, + Short: "Start sekaid", + Long: `Starts sekaid. By default uses syscall.Exec to replace this process. + +With --restart flag, runs sekaid as a subprocess and restarts on failure. + +Examples: + scaller start # Start once (replaces process) + scaller start --restart 5 # Restart up to 5 times on failure + scaller start --restart always # Restart indefinitely (max 10 retries)`, + Run: runStart, } -var startHome string +var ( + startHome string + startRestart string +) func init() { startCmd.Flags().StringVar(&startHome, "home", "/sekai", "sekaid home directory") + startCmd.Flags().StringVar(&startRestart, "restart", "", "Restart on failure: number (1-10) or 'always' (max 10)") } func runStart(cmd *cobra.Command, args []string) { Log("Starting sekaid with home=%s", startHome) + // If restart is not set, use syscall.Exec (original behavior) + if startRestart == "" { + execSekaid() + return + } + + // Parse restart mode + maxRetries := parseRestartMode(startRestart) + Log("Restart mode enabled: max %d retries", maxRetries) + + runWithRestart(maxRetries) +} + +// execSekaid replaces the current process with sekaid +func execSekaid() { argv := []string{"sekaid", "start", "--home", startHome} env := os.Environ() - // This replaces the current process with sekaid err := syscall.Exec(sekaidPath, argv, env) if err != nil { Fatal("Failed to exec sekaid: %v", err) } +} + +// parseRestartMode parses the restart flag value +func parseRestartMode(mode string) int { + if mode == "always" { + return 10 // "always" means max 10 retries + } + + // Parse as number + var n int + _, err := fmt.Sscanf(mode, "%d", &n) + if err != nil || n < 1 { + Fatal("Invalid restart value: %s (use 1-10 or 'always')", mode) + } + if n > 10 { + n = 10 + } + return n +} + +// runWithRestart runs sekaid as a subprocess with restart logic +func runWithRestart(maxRetries int) { + retryCount := 0 + backoffSeconds := []int{1, 2, 5, 10, 15, 30, 30, 30, 30, 30} // Progressive backoff + + for { + Log("Starting sekaid (attempt %d/%d)...", retryCount+1, maxRetries) + + cmd := exec.Command(sekaidPath, "start", "--home", startHome) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin - // This line is never reached if Exec succeeds + startTime := time.Now() + err := cmd.Run() + runDuration := time.Since(startTime) + + if err == nil { + Log("sekaid exited normally") + return + } + + retryCount++ + Log("sekaid exited with error: %v (ran for %v)", err, runDuration) + + // If it ran for more than 60 seconds, reset retry count (was stable) + if runDuration > 60*time.Second { + Log("Node was stable, resetting retry count") + retryCount = 1 + } + + if retryCount >= maxRetries { + Fatal("Max retries (%d) exceeded, giving up", maxRetries) + } + + // Backoff before retry + backoff := backoffSeconds[retryCount-1] + Log("Waiting %d seconds before retry...", backoff) + time.Sleep(time.Duration(backoff) * time.Second) + } } diff --git a/src/sCaller/internal/cli/status.go b/src/sCaller/internal/cli/status.go new file mode 100644 index 0000000..9c4bc48 --- /dev/null +++ b/src/sCaller/internal/cli/status.go @@ -0,0 +1,225 @@ +package cli + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + "github.com/spf13/cobra" +) + +var statusCmd = &cobra.Command{ + Use: "status", + Short: "Show node and network status", + Long: `Displays a concise status table showing sekai, interx, and network health.`, + Run: runStatus, +} + +var ( + statusRPCAddr string + statusInterxAddr string +) + +func init() { + statusCmd.Flags().StringVar(&statusRPCAddr, "rpc", "http://localhost:26657", "Sekai RPC address") + statusCmd.Flags().StringVar(&statusInterxAddr, "interx", "http://proxy.local:8080", "Interx address") +} + +// Status check results +type statusResult struct { + Name string + Status string + Detail string +} + +func runStatus(cmd *cobra.Command, args []string) { + results := []statusResult{} + + // Check Sekai RPC + sekaiStatus, sekaiDetail := checkSekai(statusRPCAddr) + results = append(results, statusResult{"Sekai", sekaiStatus, sekaiDetail}) + + // Check Interx + interxStatus, interxDetail := checkInterx(statusInterxAddr) + results = append(results, statusResult{"Interx", interxStatus, interxDetail}) + + // Get network info from Sekai + netStatus := getNetworkStatus(statusRPCAddr) + results = append(results, netStatus...) + + // Print table + printStatusTable(results) +} + +func checkSekai(rpcAddr string) (string, string) { + client := &http.Client{Timeout: 5 * time.Second} + resp, err := client.Get(rpcAddr + "/status") + if err != nil { + return "DOWN", err.Error() + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return "ERROR", fmt.Sprintf("HTTP %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "ERROR", "Failed to read response" + } + + var status struct { + Result struct { + SyncInfo struct { + CatchingUp bool `json:"catching_up"` + LatestBlockHeight string `json:"latest_block_height"` + LatestBlockTime string `json:"latest_block_time"` + } `json:"sync_info"` + } `json:"result"` + } + + if err := json.Unmarshal(body, &status); err != nil { + return "ERROR", "Invalid response" + } + + if status.Result.SyncInfo.CatchingUp { + return "SYNCING", fmt.Sprintf("height %s", status.Result.SyncInfo.LatestBlockHeight) + } + + return "OK", fmt.Sprintf("height %s", status.Result.SyncInfo.LatestBlockHeight) +} + +func checkInterx(addr string) (string, string) { + client := &http.Client{Timeout: 5 * time.Second} + resp, err := client.Get(addr + "/api/status") + if err != nil { + return "DOWN", err.Error() + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return "ERROR", fmt.Sprintf("HTTP %d", resp.StatusCode) + } + + return "OK", "responding" +} + +func getNetworkStatus(rpcAddr string) []statusResult { + results := []statusResult{} + client := &http.Client{Timeout: 5 * time.Second} + + // Get net_info for peers + resp, err := client.Get(rpcAddr + "/net_info") + if err != nil { + results = append(results, statusResult{"Peers", "N/A", "cannot fetch"}) + } else { + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + + var netInfo struct { + Result struct { + NPeers string `json:"n_peers"` + Peers []struct { + NodeInfo struct { + Moniker string `json:"moniker"` + } `json:"node_info"` + } `json:"peers"` + } `json:"result"` + } + + if err := json.Unmarshal(body, &netInfo); err == nil { + peerCount := netInfo.Result.NPeers + if peerCount == "" { + peerCount = fmt.Sprintf("%d", len(netInfo.Result.Peers)) + } + status := "OK" + if peerCount == "0" { + status = "WARN" + } + results = append(results, statusResult{"Peers", status, fmt.Sprintf("%s connected", peerCount)}) + } + } + + // Get consensus state + resp2, err := client.Get(rpcAddr + "/status") + if err == nil { + defer resp2.Body.Close() + body, _ := io.ReadAll(resp2.Body) + + var status struct { + Result struct { + NodeInfo struct { + ID string `json:"id"` + Network string `json:"network"` + Moniker string `json:"moniker"` + } `json:"node_info"` + ValidatorInfo struct { + VotingPower string `json:"voting_power"` + } `json:"validator_info"` + } `json:"result"` + } + + if err := json.Unmarshal(body, &status); err == nil { + results = append(results, statusResult{"Node ID", "INFO", status.Result.NodeInfo.ID}) + results = append(results, statusResult{"Chain", "INFO", status.Result.NodeInfo.Network}) + results = append(results, statusResult{"Moniker", "INFO", status.Result.NodeInfo.Moniker}) + + vp := status.Result.ValidatorInfo.VotingPower + if vp != "" && vp != "0" { + results = append(results, statusResult{"Validator", "OK", fmt.Sprintf("power %s", vp)}) + } else { + results = append(results, statusResult{"Validator", "INFO", "not active"}) + } + } + } + + return results +} + +func printStatusTable(results []statusResult) { + // Calculate column widths + maxName := 10 + maxStatus := 7 + for _, r := range results { + if len(r.Name) > maxName { + maxName = len(r.Name) + } + if len(r.Status) > maxStatus { + maxStatus = len(r.Status) + } + } + + // Print header + fmt.Printf("\n%-*s %-*s %s\n", maxName, "SERVICE", maxStatus, "STATUS", "DETAIL") + fmt.Printf("%s %s %s\n", repeat("-", maxName), repeat("-", maxStatus), repeat("-", 30)) + + // Print rows + for _, r := range results { + statusIcon := getStatusIcon(r.Status) + fmt.Printf("%-*s %s %-*s %s\n", maxName, r.Name, statusIcon, maxStatus-2, r.Status, r.Detail) + } + fmt.Println() +} + +func getStatusIcon(status string) string { + switch status { + case "OK": + return "[+]" + case "DOWN", "ERROR": + return "[X]" + case "WARN", "SYNCING": + return "[!]" + default: + return "[ ]" + } +} + +func repeat(s string, n int) string { + result := "" + for i := 0; i < n; i++ { + result += s + } + return result +}