diff --git a/internal/codegraph/codegraph.go b/internal/codegraph/codegraph.go index 04e5cf93d..7968a5302 100644 --- a/internal/codegraph/codegraph.go +++ b/internal/codegraph/codegraph.go @@ -138,6 +138,7 @@ func EnsureInit(ctx context.Context, bin, root string) error { ctx, cancel := context.WithTimeout(ctx, initTimeout) defer cancel() cmd := exec.CommandContext(ctx, bin, "init", root) + proc.SetProcessGroupKill(cmd) // own group so Cancel→KillTree reaps the tree off Windows (no-op on Windows) cmd.Cancel = func() error { proc.KillTree(cmd); return nil } cmd.WaitDelay = 3 * time.Second proc.HideWindow(cmd) diff --git a/internal/proc/kill_other.go b/internal/proc/kill_other.go index aa5ad08ab..a18272eb3 100644 --- a/internal/proc/kill_other.go +++ b/internal/proc/kill_other.go @@ -2,20 +2,42 @@ package proc -import "os/exec" +import ( + "os/exec" + "syscall" +) -// KillTree terminates cmd's process; off Windows it kills the direct child only. +// KillTree kills cmd's whole process group. StartTracked (and SetProcessGroupKill +// for children started outside it) put the child in its own group via Setpgid, so +// the negative-pid signal reaches every descendant — a launcher whose sub-daemon +// survives the parent (e.g. codegraph's bundled node runtime) included, where a +// plain Process.Kill would only hit the direct child and orphan the grandchild. func KillTree(cmd *exec.Cmd) { if cmd == nil || cmd.Process == nil { return } - _ = cmd.Process.Kill() + if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil { + _ = cmd.Process.Kill() // not a group leader (Setpgid wasn't set) — at least kill the child + } +} + +// SetProcessGroupKill makes cmd start in its own process group so KillTree can +// reap its whole tree. Use it for children started outside StartTracked (e.g. a +// one-shot CombinedOutput). It is a no-op on Windows, where the Job Object that +// TrackTree/StartTracked assigns handles the tree instead. +func SetProcessGroupKill(cmd *exec.Cmd) { + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.SysProcAttr.Setpgid = true } -// StartTracked starts cmd. Off Windows there is no Job Object to track it in — -// the platform reaps the child directly — so it just starts and returns a 0 -// handle, and KillTracked falls back to KillTree. +// StartTracked starts cmd in its own process group so KillTracked / KillTree can +// reap its whole tree. Off Windows the process group is the equivalent of the +// Windows Job Object; it returns a 0 handle, and KillTracked falls back to +// KillTree. func StartTracked(cmd *exec.Cmd) (uintptr, error) { + SetProcessGroupKill(cmd) return 0, cmd.Start() } diff --git a/internal/proc/kill_other_test.go b/internal/proc/kill_other_test.go index cdacbcfd2..b13fe9989 100644 --- a/internal/proc/kill_other_test.go +++ b/internal/proc/kill_other_test.go @@ -3,7 +3,12 @@ package proc import ( + "bufio" + "errors" "os/exec" + "strconv" + "strings" + "syscall" "testing" "time" ) @@ -45,3 +50,40 @@ func TestKillTrackedTerminatesChild(t *testing.T) { t.Fatal("cmd.Wait blocked after KillTracked") } } + +// A launcher (sh) that backgrounds a grandchild (sleep) and stays alive: with +// the child in its own process group, KillTracked's negative-pid kill must reap +// the grandchild too, not just sh — the codegraph node-daemon leak this guards. +func TestKillTrackedReapsProcessGroupGrandchild(t *testing.T) { + cmd := exec.Command("sh", "-c", "sleep 60 & echo $!; wait") + stdout, err := cmd.StdoutPipe() + if err != nil { + t.Fatalf("StdoutPipe: %v", err) + } + if _, err := StartTracked(cmd); err != nil { + t.Fatalf("StartTracked: %v", err) + } + + line, err := bufio.NewReader(stdout).ReadString('\n') + if err != nil { + t.Fatalf("read grandchild pid: %v", err) + } + gcPid, err := strconv.Atoi(strings.TrimSpace(line)) + if err != nil || gcPid <= 0 { + t.Fatalf("bad grandchild pid %q: %v", line, err) + } + if syscall.Kill(gcPid, 0) != nil { + t.Fatalf("grandchild %d not alive before kill", gcPid) + } + + KillTracked(cmd, 0) + _ = cmd.Wait() + + deadline := time.Now().Add(5 * time.Second) + for !errors.Is(syscall.Kill(gcPid, 0), syscall.ESRCH) { + if time.Now().After(deadline) { + t.Fatalf("grandchild %d survived KillTracked — process group not reaped", gcPid) + } + time.Sleep(20 * time.Millisecond) + } +} diff --git a/internal/proc/kill_windows.go b/internal/proc/kill_windows.go index f139d7308..cdbb6ab2b 100644 --- a/internal/proc/kill_windows.go +++ b/internal/proc/kill_windows.go @@ -11,6 +11,12 @@ import ( "golang.org/x/sys/windows" ) +// SetProcessGroupKill is a no-op on Windows: the Job Object that StartTracked +// assigns reaps the whole tree on close, so Setpgid (which doesn't exist here) +// is unnecessary. It exists so non-Windows callers can request group kill +// uniformly. +func SetProcessGroupKill(*exec.Cmd) {} + // KillTree terminates cmd and every descendant it spawned. Process.Kill only // signals the direct child, so a launcher (cmd.exe → node.exe) leaves the // grandchild alive holding the inherited stdout/stderr pipes — which makes