Skip to content

Commit ad345bd

Browse files
tcm: add a command the start TCM
@TarantoolBot document Title: Add a command the start TCM `tt tcm start` OR `tt tcm start --path /path/to/tcm`: added the capability to run TCM in interactive mode. `tt tcm start --watchdog`: implemented Watchdog mode for automatic restarting of TCM upon unexpected termination. Closes #TNTP-1102 Closes #TNTP-1771
1 parent 5e7b6a9 commit ad345bd

File tree

8 files changed

+417
-0
lines changed

8 files changed

+417
-0
lines changed

CHANGELOG.md

+7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
99

1010
### Added
1111

12+
- `tt pack `: added TCM file packaging.
13+
- `tt aeon connect`: add connection from the cluster config.
14+
- `tt aeon connect`: add connection from the `app:insance_name`.
15+
- `tt tcm start`: add the tcm command.
16+
- `tt tcm start` OR `tt tcm start --path /path/to/tcm`: added the capability to run TCM in interactive mode.
17+
- `tt tcm start --watchdog`: implemented Watchdog mode for automatic restarting of TCM upon unexpected termination.
18+
1219
### Changed
1320

1421
### Fixed

cli/cmd/root.go

+1
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ After that tt will be able to manage the application using 'replicaset_example'
195195
NewLogCmd(),
196196
NewEnableCmd(),
197197
NewAeonCmd(),
198+
NewTcmCmd(),
198199
)
199200
if err := injectCmds(rootCmd); err != nil {
200201
panic(err.Error())

cli/cmd/tcm.go

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package cmd
2+
3+
import (
4+
"errors"
5+
"os"
6+
"os/exec"
7+
"time"
8+
9+
"github.com/spf13/cobra"
10+
"github.com/tarantool/tt/cli/cmdcontext"
11+
"github.com/tarantool/tt/cli/modules"
12+
tcmCmd "github.com/tarantool/tt/cli/tcm"
13+
"github.com/tarantool/tt/cli/util"
14+
)
15+
16+
var tcmCtx = tcmCmd.TcmCtx{}
17+
18+
func newTcmStartCmd() *cobra.Command {
19+
var tcmCmd = &cobra.Command{
20+
Use: "start",
21+
Short: "Start tcm application",
22+
Long: `Start to the tcm.
23+
tt tcm start --watchdog
24+
tt tcm start --path`,
25+
Run: func(cmd *cobra.Command, args []string) {
26+
cmdCtx.CommandName = cmd.Name()
27+
err := modules.RunCmd(&cmdCtx, cmd.CommandPath(), &modulesInfo, internalStartTcm, args)
28+
util.HandleCmdErr(cmd, err)
29+
30+
},
31+
}
32+
tcmCmd.Flags().StringVar(&tcmCtx.Executable, "path", "", "the path to the tcm binary file")
33+
tcmCmd.Flags().BoolVar(&tcmCtx.Watchdog, "watchdog", false, "enables the watchdog")
34+
35+
return tcmCmd
36+
}
37+
38+
func NewTcmCmd() *cobra.Command {
39+
var tcmCmd = &cobra.Command{
40+
Use: "tcm",
41+
Short: "Manage tcm application",
42+
}
43+
tcmCmd.AddCommand(
44+
newTcmStartCmd(),
45+
)
46+
return tcmCmd
47+
}
48+
49+
func startTcmInteractive() error {
50+
tcmApp := exec.Command(tcmCtx.Executable)
51+
52+
tcmApp.Stdout = os.Stdout
53+
tcmApp.Stderr = os.Stderr
54+
55+
if err := tcmApp.Run(); err != nil {
56+
return err
57+
}
58+
59+
return nil
60+
}
61+
62+
func startTcmUnderWatchDog() error {
63+
wd, err := tcmCmd.NewWatchdog(5 * time.Second)
64+
if err != nil {
65+
return err
66+
}
67+
68+
if err := wd.Start(tcmCtx.Executable); err != nil {
69+
return err
70+
}
71+
72+
return nil
73+
}
74+
75+
func internalStartTcm(cmdCtx *cmdcontext.CmdCtx, args []string) error {
76+
if cmdCtx.Cli.TarantoolCli.Executable == "" {
77+
return errors.New("cannot start: tarantool binary is not found")
78+
}
79+
80+
if cmdCtx.Cli.TcmCli.Executable == "" {
81+
return errors.New("cannot start: tcm binary is not found")
82+
}
83+
84+
tcmCtx.Executable = cmdCtx.Cli.TcmCli.Executable
85+
86+
if !tcmCtx.Watchdog {
87+
if err := startTcmInteractive(); err != nil {
88+
return err
89+
}
90+
}
91+
92+
if err := startTcmUnderWatchDog(); err != nil {
93+
return err
94+
}
95+
96+
return nil
97+
}

cli/tcm/tcm.go

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package tcm
2+
3+
type TcmCtx struct {
4+
Executable string
5+
Watchdog bool
6+
}

cli/tcm/watchdog.go

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package tcm
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"log"
8+
"os"
9+
"os/exec"
10+
"os/signal"
11+
"path/filepath"
12+
"sync"
13+
"syscall"
14+
"time"
15+
)
16+
17+
// Watchdog manages the lifecycle of a process.
18+
type Watchdog struct {
19+
// The command to execute and monitor.
20+
cmd *exec.Cmd
21+
// Time to wait before restarting the process.
22+
restartTimeout time.Duration
23+
// Flag to indicate if the Watchdog should stop.
24+
shouldStop bool
25+
// Mutex to protect access to shouldStop.
26+
stopMutex sync.Mutex
27+
// WaitGroup to wait for all goroutines to finish.
28+
doneBarrier sync.WaitGroup
29+
// File to store the process PID.
30+
pidFile string
31+
}
32+
33+
// NewWatchdog creates a new Watchdog instance.
34+
func NewWatchdog(restartTimeout time.Duration) (*Watchdog, error) {
35+
return &Watchdog{
36+
restartTimeout: restartTimeout,
37+
pidFile: "tcm/pidFile.pid",
38+
}, nil
39+
}
40+
41+
// Start starts the process and monitors its execution.
42+
func (wd *Watchdog) Start(bin string, args ...string) error {
43+
wd.doneBarrier.Add(1)
44+
defer wd.doneBarrier.Done()
45+
46+
signalCtx, signalCancel := context.WithCancel(context.Background())
47+
defer signalCancel()
48+
49+
go wd.handleSignals(signalCtx, signalCancel)
50+
51+
for {
52+
wd.stopMutex.Lock()
53+
if wd.shouldStop {
54+
wd.stopMutex.Unlock()
55+
return nil
56+
}
57+
wd.stopMutex.Unlock()
58+
59+
wd.cmd = exec.Command(bin, args...)
60+
wd.cmd.Stdout = os.Stdout
61+
wd.cmd.Stderr = os.Stderr
62+
63+
log.Println("(INFO): Starting process...")
64+
if err := wd.cmd.Start(); err != nil {
65+
log.Printf("(ERROR): Failed to start process: %v\n", err)
66+
return err
67+
}
68+
69+
if err := wd.writePIDToFile(); err != nil {
70+
log.Printf("(ERROR): Failed to write PID to file: %v\n", err)
71+
return err
72+
}
73+
74+
err := wd.cmd.Wait()
75+
if err != nil {
76+
var exitErr *exec.ExitError
77+
if errors.As(err, &exitErr) {
78+
log.Printf("(WARN): Process exited with error: %v\n", exitErr)
79+
} else {
80+
log.Printf("(ERROR): Process failed: %v\n", err)
81+
return err
82+
}
83+
} else {
84+
log.Println("(INFO): Process completed successfully.")
85+
}
86+
87+
wd.stopMutex.Lock()
88+
if wd.shouldStop {
89+
wd.stopMutex.Unlock()
90+
return nil
91+
}
92+
wd.stopMutex.Unlock()
93+
94+
log.Printf("(INFO): Waiting for %s before restart...\n", wd.restartTimeout)
95+
time.Sleep(wd.restartTimeout)
96+
}
97+
}
98+
99+
// Stop stops the process and shuts down the Watchdog.
100+
func (wd *Watchdog) Stop() {
101+
wd.stopMutex.Lock()
102+
wd.shouldStop = true
103+
if wd.cmd != nil && wd.cmd.Process != nil {
104+
log.Println("(INFO): Stopping process...")
105+
if err := wd.cmd.Process.Signal(syscall.SIGTERM); err != nil {
106+
log.Printf("(ERROR): Failed to stop process: %v\n", err)
107+
}
108+
}
109+
wd.stopMutex.Unlock()
110+
111+
wd.doneBarrier.Wait()
112+
os.RemoveAll(filepath.Dir(wd.pidFile))
113+
log.Println("(INFO): Watchdog stopped.")
114+
}
115+
116+
// handleSignals listens for OS signals and stops the Watchdog gracefully.
117+
func (wd *Watchdog) handleSignals(ctx context.Context, cancel context.CancelFunc) {
118+
signalChan := make(chan os.Signal, 1)
119+
signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
120+
121+
select {
122+
case <-signalChan:
123+
log.Println("(INFO): Received stop signal.")
124+
wd.Stop()
125+
cancel()
126+
case <-ctx.Done():
127+
return
128+
}
129+
}
130+
131+
// writePIDToFile writes the PID of the process to a file.
132+
func (wd *Watchdog) writePIDToFile() error {
133+
if wd.cmd == nil || wd.cmd.Process == nil {
134+
return errors.New("process is not running")
135+
}
136+
137+
pid := wd.cmd.Process.Pid
138+
pidData := fmt.Sprintf("%d", pid)
139+
140+
dir := filepath.Dir(wd.pidFile)
141+
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
142+
return err
143+
}
144+
145+
file, err := os.Create(wd.pidFile)
146+
if err != nil {
147+
return fmt.Errorf("failed to create PID file: %v", err)
148+
}
149+
defer file.Close()
150+
151+
_, err = file.WriteString(pidData)
152+
if err != nil {
153+
return err
154+
}
155+
156+
log.Printf("(INFO): PID %d written to %s\n", pid, wd.pidFile)
157+
return nil
158+
}

cli/tcm/watchdog_test.go

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package tcm
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"os/exec"
7+
"testing"
8+
"time"
9+
10+
"github.com/stretchr/testify/require"
11+
)
12+
13+
func TestWatchdogStartProcess(t *testing.T) {
14+
watchdog, err := NewWatchdog(1 * time.Second)
15+
require.NoError(t, err)
16+
17+
go func() {
18+
watchdog.Start("sleep", "5")
19+
require.NoError(t, err)
20+
}()
21+
22+
time.Sleep(2 * time.Second)
23+
24+
_, err = os.Stat(watchdog.pidFile)
25+
require.NoError(t, err)
26+
27+
watchdog.Stop()
28+
}
29+
30+
func TestWatchdogRestartProcess(t *testing.T) {
31+
watchdog, err := NewWatchdog(1 * time.Second)
32+
require.NoError(t, err)
33+
34+
go func() {
35+
err := watchdog.Start("sleep", "1")
36+
require.NoError(t, err)
37+
}()
38+
39+
time.Sleep(3 * time.Second)
40+
41+
_, err = os.Stat(watchdog.pidFile)
42+
require.NoError(t, err)
43+
44+
watchdog.Stop()
45+
}
46+
47+
func TestWritePIDToFile(t *testing.T) {
48+
pidFile := "/tmp/watchdog_test.pid"
49+
defer os.Remove(pidFile)
50+
51+
cmd := exec.Command("sleep", "1")
52+
err := cmd.Start()
53+
require.NoError(t, err)
54+
defer cmd.Process.Kill()
55+
56+
watchdog := &Watchdog{
57+
cmd: cmd,
58+
pidFile: pidFile,
59+
}
60+
61+
err = watchdog.writePIDToFile()
62+
require.NoError(t, err)
63+
64+
pidData, err := os.ReadFile(pidFile)
65+
require.NoError(t, err)
66+
67+
expectedPID := fmt.Sprintf("%d", cmd.Process.Pid)
68+
require.Equal(t, expectedPID, string(pidData))
69+
}

tcm/pidFile.pid

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
44947

0 commit comments

Comments
 (0)