Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a command the start TCM #1127

Merged
merged 1 commit into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]

### Added
- `tt aeon connect` added tests for connect file/app.

- `tt aeon connect` added tests for connect file/app.
- `tt pack `: support `.packignore` file to specify files that should not be included
in package (works the same as `.gitignore`).
- `tt tcm start`: add the tcm command.
- `tt tcm start` OR `tt tcm start --path /path/to/tcm`: added the capability to run TCM in interactive mode.
- `tt tcm start --watchdog`: implemented Watchdog mode for automatic restarting of TCM upon unexpected termination.

### Changed

Expand Down Expand Up @@ -708,4 +711,4 @@ Additionally, several fixes were implemented to improve stability.
- Module ``tt create``, to create an application from a template.
- Module ``tt build``, to build an application.
- Module ``tt install``, to install tarantool/tt.
- Module ``tt remove``, to remove tarantool/tt.
- Module ``tt remove``, to remove tarantool/tt.
1 change: 1 addition & 0 deletions cli/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ After that tt will be able to manage the application using 'replicaset_example'
NewLogCmd(),
NewEnableCmd(),
NewAeonCmd(),
NewTcmCmd(),
)
if err := injectCmds(rootCmd); err != nil {
panic(err.Error())
Expand Down
97 changes: 97 additions & 0 deletions cli/cmd/tcm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package cmd

import (
"errors"
"os"
"os/exec"
"time"

"github.com/spf13/cobra"
"github.com/tarantool/tt/cli/cmdcontext"
"github.com/tarantool/tt/cli/modules"
tcmCmd "github.com/tarantool/tt/cli/tcm"
"github.com/tarantool/tt/cli/util"
)

var tcmCtx = tcmCmd.TcmCtx{}

func newTcmStartCmd() *cobra.Command {
var tcmCmd = &cobra.Command{
Use: "start",
Short: "Start tcm application",
Long: `Start to the tcm.
tt tcm start --watchdog
tt tcm start --path`,
Run: func(cmd *cobra.Command, args []string) {
cmdCtx.CommandName = cmd.Name()
err := modules.RunCmd(&cmdCtx, cmd.CommandPath(), &modulesInfo, internalStartTcm, args)
util.HandleCmdErr(cmd, err)

},
}
tcmCmd.Flags().StringVar(&tcmCtx.Executable, "path", "", "the path to the tcm binary file")
tcmCmd.Flags().BoolVar(&tcmCtx.Watchdog, "watchdog", false, "enables the watchdog")

return tcmCmd
}

func NewTcmCmd() *cobra.Command {
var tcmCmd = &cobra.Command{
Use: "tcm",
Short: "Manage tcm application",
}
tcmCmd.AddCommand(
newTcmStartCmd(),
)
return tcmCmd
}

func startTcmInteractive() error {
tcmApp := exec.Command(tcmCtx.Executable)

tcmApp.Stdout = os.Stdout
tcmApp.Stderr = os.Stderr

if err := tcmApp.Run(); err != nil {
return err
}

return nil
}

func startTcmUnderWatchDog() error {
wd, err := tcmCmd.NewWatchdog(5 * time.Second)
if err != nil {
return err
}

if err := wd.Start(tcmCtx.Executable); err != nil {
return err
}

return nil
}

func internalStartTcm(cmdCtx *cmdcontext.CmdCtx, args []string) error {
if cmdCtx.Cli.TarantoolCli.Executable == "" {
return errors.New("cannot start: tarantool binary is not found")
}

if cmdCtx.Cli.TcmCli.Executable == "" {
return errors.New("cannot start: tcm binary is not found")
}

tcmCtx.Executable = cmdCtx.Cli.TcmCli.Executable

if !tcmCtx.Watchdog {
if err := startTcmInteractive(); err != nil {
return err
}
}

if err := startTcmUnderWatchDog(); err != nil {
return err
}

return nil
}
6 changes: 6 additions & 0 deletions cli/tcm/tcm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package tcm

type TcmCtx struct {
Executable string
Watchdog bool
}
158 changes: 158 additions & 0 deletions cli/tcm/watchdog.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package tcm

import (
"context"
"errors"
"fmt"
"log"
"os"
"os/exec"
"os/signal"
"path/filepath"
"sync"
"syscall"
"time"
)

// Watchdog manages the lifecycle of a process.
type Watchdog struct {
// The command to execute and monitor.
cmd *exec.Cmd
// Time to wait before restarting the process.
restartTimeout time.Duration
// Flag to indicate if the Watchdog should stop.
shouldStop bool
// Mutex to protect access to shouldStop.
stopMutex sync.Mutex
// WaitGroup to wait for all goroutines to finish.
doneBarrier sync.WaitGroup
// File to store the process PID.
pidFile string
}

// NewWatchdog creates a new Watchdog instance.
func NewWatchdog(restartTimeout time.Duration) (*Watchdog, error) {
return &Watchdog{
restartTimeout: restartTimeout,
pidFile: "tcm/pidFile.pid",
}, nil
}

// Start starts the process and monitors its execution.
func (wd *Watchdog) Start(bin string, args ...string) error {
wd.doneBarrier.Add(1)
defer wd.doneBarrier.Done()

signalCtx, signalCancel := context.WithCancel(context.Background())
defer signalCancel()

go wd.handleSignals(signalCtx, signalCancel)

for {
wd.stopMutex.Lock()
if wd.shouldStop {
wd.stopMutex.Unlock()
return nil
}
wd.stopMutex.Unlock()

wd.cmd = exec.Command(bin, args...)
wd.cmd.Stdout = os.Stdout
wd.cmd.Stderr = os.Stderr

log.Println("(INFO): Starting process...")
if err := wd.cmd.Start(); err != nil {
log.Printf("(ERROR): Failed to start process: %v\n", err)
return err
}

if err := wd.writePIDToFile(); err != nil {
log.Printf("(ERROR): Failed to write PID to file: %v\n", err)
return err
}

err := wd.cmd.Wait()
if err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
log.Printf("(WARN): Process exited with error: %v\n", exitErr)
} else {
log.Printf("(ERROR): Process failed: %v\n", err)
return err
}
} else {
log.Println("(INFO): Process completed successfully.")
}

wd.stopMutex.Lock()
if wd.shouldStop {
wd.stopMutex.Unlock()
return nil
}
wd.stopMutex.Unlock()

log.Printf("(INFO): Waiting for %s before restart...\n", wd.restartTimeout)
time.Sleep(wd.restartTimeout)
}
}

// Stop stops the process and shuts down the Watchdog.
func (wd *Watchdog) Stop() {
wd.stopMutex.Lock()
wd.shouldStop = true
if wd.cmd != nil && wd.cmd.Process != nil {
log.Println("(INFO): Stopping process...")
if err := wd.cmd.Process.Signal(syscall.SIGTERM); err != nil {
log.Printf("(ERROR): Failed to stop process: %v\n", err)
}
}
wd.stopMutex.Unlock()

wd.doneBarrier.Wait()
os.RemoveAll(filepath.Dir(wd.pidFile))
log.Println("(INFO): Watchdog stopped.")
}

// handleSignals listens for OS signals and stops the Watchdog gracefully.
func (wd *Watchdog) handleSignals(ctx context.Context, cancel context.CancelFunc) {
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)

select {
case <-signalChan:
log.Println("(INFO): Received stop signal.")
wd.Stop()
cancel()
case <-ctx.Done():
return
}
}

// writePIDToFile writes the PID of the process to a file.
func (wd *Watchdog) writePIDToFile() error {
if wd.cmd == nil || wd.cmd.Process == nil {
return errors.New("process is not running")
}

pid := wd.cmd.Process.Pid
pidData := fmt.Sprintf("%d", pid)

dir := filepath.Dir(wd.pidFile)
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
return err
}

file, err := os.Create(wd.pidFile)
if err != nil {
return fmt.Errorf("failed to create PID file: %v", err)
}
defer file.Close()

_, err = file.WriteString(pidData)
if err != nil {
return err
}

log.Printf("(INFO): PID %d written to %s\n", pid, wd.pidFile)
return nil
}
69 changes: 69 additions & 0 deletions cli/tcm/watchdog_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package tcm

import (
"fmt"
"os"
"os/exec"
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestWatchdogStartProcess(t *testing.T) {
watchdog, err := NewWatchdog(1 * time.Second)
require.NoError(t, err)

go func() {
watchdog.Start("sleep", "5")
require.NoError(t, err)
}()

time.Sleep(2 * time.Second)

_, err = os.Stat(watchdog.pidFile)
require.NoError(t, err)

watchdog.Stop()
}

func TestWatchdogRestartProcess(t *testing.T) {
watchdog, err := NewWatchdog(1 * time.Second)
require.NoError(t, err)

go func() {
err := watchdog.Start("sleep", "1")
require.NoError(t, err)
}()

time.Sleep(3 * time.Second)

_, err = os.Stat(watchdog.pidFile)
require.NoError(t, err)

watchdog.Stop()
}

func TestWritePIDToFile(t *testing.T) {
pidFile := "/tmp/watchdog_test.pid"
defer os.Remove(pidFile)

cmd := exec.Command("sleep", "1")
err := cmd.Start()
require.NoError(t, err)
defer cmd.Process.Kill()

watchdog := &Watchdog{
cmd: cmd,
pidFile: pidFile,
}

err = watchdog.writePIDToFile()
require.NoError(t, err)

pidData, err := os.ReadFile(pidFile)
require.NoError(t, err)

expectedPID := fmt.Sprintf("%d", cmd.Process.Pid)
require.Equal(t, expectedPID, string(pidData))
}
Loading