Skip to content

Commit

Permalink
kernel: start RLIMIT_CPU timers in NewThreadGroup
Browse files Browse the repository at this point in the history
Before cl/695198313, this bug only affected RLIMIT_CPU soft limits, which were
represented by tg.rlimitCPUSoftSetting and was similarly uninitialized by
Kernel.NewThreadGroup(); the CPU clock ticker fetched RLIMIT_CPU hard limits in
each tick. After cl/695198313, this bug affects both RLIMIT_CPU soft and hard
limits.

Itimers don't have the same issue since they're not preserved across fork().

PiperOrigin-RevId: 695936410
  • Loading branch information
nixprime authored and gvisor-bot committed Nov 13, 2024
1 parent 45b346e commit 94aa652
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 11 deletions.
20 changes: 12 additions & 8 deletions pkg/sentry/kernel/task_acct.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,18 +90,22 @@ func (t *Task) Setitimer(id int32, newitv linux.ItimerVal) (linux.ItimerVal, err
//
// Preconditions: The caller must be running on the task goroutine.
func (t *Task) NotifyRlimitCPUUpdated() {
// Lock t.tg.timerMu to synchronize updates to these timers between tasks
// in t.tg.
t.tg.timerMu.Lock()
defer t.tg.timerMu.Unlock()
rlimitCPU := t.tg.limits.Get(limits.CPU)
t.tg.appSysCPUClockLast.Store(t)
t.tg.rlimitCPUSoftTimer.Set(ktime.Setting{
t.tg.notifyRlimitCPUUpdated(t)
}

func (tg *ThreadGroup) notifyRlimitCPUUpdated(t *Task) {
// Lock tg.timerMu to synchronize updates to these timers between tasks in
// tg.
tg.timerMu.Lock()
defer tg.timerMu.Unlock()
rlimitCPU := tg.limits.Get(limits.CPU)
tg.appSysCPUClockLast.Store(t)
tg.rlimitCPUSoftTimer.Set(ktime.Setting{
Enabled: rlimitCPU.Cur != limits.Infinity,
Next: ktime.FromSeconds(int64(min(rlimitCPU.Cur, math.MaxInt64))),
Period: time.Second,
}, nil)
t.tg.rlimitCPUHardTimer.Set(ktime.Setting{
tg.rlimitCPUHardTimer.Set(ktime.Setting{
Enabled: rlimitCPU.Max != limits.Infinity,
Next: ktime.FromSeconds(int64(min(rlimitCPU.Max, math.MaxInt64))),
}, nil)
Expand Down
1 change: 1 addition & 0 deletions pkg/sentry/kernel/thread_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ func (k *Kernel) NewThreadGroup(pidns *PIDNamespace, sh *SignalHandlers, termina
tg.rlimitCPUSoftListener.tg = tg
tg.rlimitCPUHardTimer.Init(&tg.appSysCPUClock, &tg.rlimitCPUHardListener)
tg.rlimitCPUHardListener.tg = tg
tg.notifyRlimitCPUUpdated(nil)
tg.oldRSeqCritical.Store(&OldRSeqCriticalRegion{})
return tg
}
Expand Down
81 changes: 78 additions & 3 deletions test/syscalls/linux/timers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
// limitations under the License.

#include <errno.h>
#include <poll.h>
#include <signal.h>
#include <stdint.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <syscall.h>
Expand Down Expand Up @@ -50,13 +52,15 @@ namespace {
#define CPUCLOCK_PROF 0
#endif // CPUCLOCK_PROF

PosixErrorOr<absl::Duration> ProcessCPUTime(pid_t pid) {
clockid_t ProcessCPUClock(pid_t pid) {
// Use pid-specific CPUCLOCK_PROF, which is the clock used to enforce
// RLIMIT_CPU.
clockid_t clockid = (~static_cast<clockid_t>(pid) << 3) | CPUCLOCK_PROF;
return (~static_cast<clockid_t>(pid) << 3) | CPUCLOCK_PROF;
}

PosixErrorOr<absl::Duration> ProcessCPUTime(pid_t pid) {
struct timespec ts;
int ret = clock_gettime(clockid, &ts);
int ret = clock_gettime(ProcessCPUClock(pid), &ts);
if (ret < 0) {
return PosixError(errno, "clock_gettime failed");
}
Expand Down Expand Up @@ -223,6 +227,77 @@ TEST(TimerTest, ProcessKilledOnCPUHardLimit) {
EXPECT_GE(cpu, kHardLimit);
}

TEST(TimerTest, RlimitCpuInheritedAcrossFork) {
pid_t child_pid = fork();
MaybeSave();
if (child_pid == 0) {
// Ignore SIGXCPU from the RLIMIT_CPU soft limit.
struct sigaction new_action;
new_action.sa_handler = NoopSignalHandler;
new_action.sa_flags = 0;
sigemptyset(&new_action.sa_mask);
TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0);

// Set both soft and hard limits to expire a short time from now. (Since we
// may not be able to raise RLIMIT_CPU again, this must happen in a
// disposable child of the test process.)
constexpr int kDelaySeconds = 2;
struct timespec ts;
TEST_PCHECK(clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0);
struct rlimit cpu_limits;
// +1 to round up, presuming that ts.tv_nsec > 0.
cpu_limits.rlim_cur = cpu_limits.rlim_max = ts.tv_sec + kDelaySeconds + 1;
TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0);
MaybeSave();

pid_t grandchild_pid = fork();
MaybeSave();
if (grandchild_pid == 0) {
int pipefd[2];
TEST_PCHECK(pipe(pipefd) == 0);
struct pollfd pfd;
pfd.fd = pipefd[0];
pfd.events = POLLIN;
struct timespec timeout;
timeout.tv_sec = 0;
timeout.tv_nsec = 1000;

// Burn CPU.
uint64_t x = 0;
for (;;) {
x++;
benchmark::DoNotOptimize(x); // Don't optimize this loop away.
// Periodically block to ensure that child_pid gets a chance to run and
// block in waitid().
// TODO: b/315388929 - remove this
if (x % 16384 == 0) {
TEST_PCHECK(ppoll(&pfd, 1, &timeout, nullptr) == 0);
}
}
}
TEST_PCHECK(grandchild_pid > 0);

// Wait for the grandchild to exit, but do not reap it. This will allow us
// to check its CPU usage while it is zombied.
TEST_PCHECK(waitid(P_PID, grandchild_pid, nullptr, WEXITED | WNOWAIT) == 0);
MaybeSave();
TEST_PCHECK(clock_gettime(ProcessCPUClock(grandchild_pid), &ts) == 0);
TEST_CHECK(ts.tv_sec >= static_cast<long>(cpu_limits.rlim_max));
// Reap the grandchild and check that it was SIGKILLed by the RLIMIT_CPU
// hard limit.
int status;
TEST_PCHECK(waitpid(grandchild_pid, &status, 0) == grandchild_pid);
TEST_CHECK(WIFSIGNALED(status) && (WTERMSIG(status) == SIGKILL));
_exit(0);
}

int status;
ASSERT_THAT(waitpid(child_pid, &status, 0),
SyscallSucceedsWithValue(child_pid));
EXPECT_TRUE(WIFEXITED(status) && (WEXITSTATUS(status) == 0))
<< "status = " << status;
}

// See timerfd.cc:TimerSlack() for rationale.
constexpr absl::Duration kTimerSlack = absl::Milliseconds(500);

Expand Down

0 comments on commit 94aa652

Please sign in to comment.