Skip to content

Commit 0194ed9

Browse files
tranji-cloudgvisor-bot
authored andcommitted
kvm: RSEQ: Add RSEQ support
This change adds RSEQ support for platform/kvm. To support this, the KVM platform must provide two key capabilities: (1) A stable, unique CPU ID. (2) A way to detect when a thread as been preempted. This implementation provides the necessary support as follows: CPU ID: - platform/kvm now advertises the KVM vCPU ID as the cpu_id Preemption Detection: - Compares the last context the CPU ran against the current context being scheduled. - Compares the context's rseqCPU and CPU ID retrieved by the platform To facilitate this, several new methods are introduced to the platform interface and implemented by platform/kvm: - HasCpuNumbers() - NumCPUs() - DetectsCPUPreemption() - PreemptCpu() - PreemptAllCpus() PiperOrigin-RevId: 823228013
1 parent cd0901c commit 0194ed9

File tree

14 files changed

+172
-11
lines changed

14 files changed

+172
-11
lines changed

pkg/sentry/kernel/kernel.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,11 @@ func (k *Kernel) Init(args InitKernelArgs) error {
490490
k.cpuClockTickerWakeCh = make(chan struct{}, 1)
491491
k.cpuClockTickerStopCond.L = &k.runningTasksMu
492492
k.applicationCores = args.ApplicationCores
493+
if args.UseHostCores && k.HasCPUNumbers() {
494+
args.UseHostCores = false
495+
log.Infof("UseHostCores enabled but the platform implements HasCPUNumbers(): setting UseHostCores to false")
496+
}
497+
493498
if args.UseHostCores {
494499
k.useHostCores = true
495500
maxCPU, err := hostcpu.MaxPossibleCPU()
@@ -502,6 +507,15 @@ func (k *Kernel) Init(args InitKernelArgs) error {
502507
k.applicationCores = minAppCores
503508
}
504509
}
510+
511+
if k.HasCPUNumbers() {
512+
if k.applicationCores < uint(k.NumCPUs()) {
513+
log.Infof("ApplicationCores is less than NumCPUs: %d < %d", k.applicationCores, k.NumCPUs())
514+
log.Infof("Setting applicationCores to NumCPUs: %d", k.NumCPUs())
515+
k.applicationCores = uint(k.NumCPUs())
516+
}
517+
}
518+
505519
k.extraAuxv = args.ExtraAuxv
506520
k.vdso = args.Vdso
507521
k.vdsoParams = args.VdsoParams

pkg/sentry/kernel/rseq.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"gvisor.dev/gvisor/pkg/abi/linux"
2121
"gvisor.dev/gvisor/pkg/errors/linuxerr"
2222
"gvisor.dev/gvisor/pkg/hostarch"
23-
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
2423
"gvisor.dev/gvisor/pkg/usermem"
2524
)
2625

@@ -50,7 +49,7 @@ type OldRSeqCriticalRegion struct {
5049

5150
// RSeqAvailable returns true if t supports (old and new) restartable sequences.
5251
func (t *Task) RSeqAvailable() bool {
53-
return t.k.useHostCores && t.k.Platform.DetectsCPUPreemption()
52+
return (t.k.useHostCores || t.k.Platform.HasCPUNumbers()) && t.k.Platform.DetectsCPUPreemption()
5453
}
5554

5655
// SetRSeq registers addr as this thread's rseq structure.
@@ -201,7 +200,7 @@ func (t *Task) rseqUpdateCPU() error {
201200
return nil
202201
}
203202

204-
t.rseqCPU = int32(hostcpu.GetCPU())
203+
t.rseqCPU = t.CPU()
205204

206205
// Update both CPUs, even if one fails.
207206
rerr := t.rseqCopyOutCPU()

pkg/sentry/kernel/task_run.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"gvisor.dev/gvisor/pkg/goid"
2525
"gvisor.dev/gvisor/pkg/hostarch"
2626
"gvisor.dev/gvisor/pkg/refs"
27-
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
2827
"gvisor.dev/gvisor/pkg/sentry/ktime"
2928
"gvisor.dev/gvisor/pkg/sentry/memmap"
3029
"gvisor.dev/gvisor/pkg/sentry/platform"
@@ -207,7 +206,7 @@ func (app *runApp) execute(t *Task) taskRunState {
207206
if t.rseqPreempted {
208207
t.rseqPreempted = false
209208
if t.rseqAddr != 0 || t.oldRSeqCPUAddr != 0 {
210-
t.rseqCPU = int32(hostcpu.GetCPU())
209+
t.rseqCPU = t.CPU()
211210
if err := t.rseqCopyOutCPU(); err != nil {
212211
t.Debugf("Failed to copy CPU to %#x for rseq: %v", t.rseqAddr, err)
213212
t.forceSignal(linux.SIGSEGV, false)

pkg/sentry/kernel/task_sched.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error {
365365
return linuxerr.EINVAL
366366
}
367367

368-
if t.k.useHostCores {
368+
if t.k.useHostCores || t.k.Platform.HasCPUNumbers() {
369369
// No-op; pretend the mask was immediately changed back.
370370
return nil
371371
}
@@ -383,6 +383,10 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error {
383383

384384
// CPU returns the cpu id for a given task.
385385
func (t *Task) CPU() int32 {
386+
if t.k.Platform.HasCPUNumbers() {
387+
return t.p.LastCPUNumber()
388+
}
389+
386390
if t.k.useHostCores {
387391
return int32(hostcpu.GetCPU())
388392
}

pkg/sentry/platform/kvm/context.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package kvm
1616

1717
import (
1818
"gvisor.dev/gvisor/pkg/abi/linux"
19+
"gvisor.dev/gvisor/pkg/atomicbitops"
1920
pkgcontext "gvisor.dev/gvisor/pkg/context"
2021
"gvisor.dev/gvisor/pkg/hostarch"
2122
"gvisor.dev/gvisor/pkg/ring0"
@@ -36,6 +37,9 @@ type platformContext struct {
3637

3738
// interrupt is the interrupt platformContext.
3839
interrupt interrupt.Forwarder
40+
41+
// lastUsedCPU is the last CPU ID used by this platformContext.
42+
lastUsedCPU atomicbitops.Int32
3943
}
4044

4145
// tryCPUIDError indicates that CPUID emulation should occur.
@@ -45,7 +49,7 @@ type tryCPUIDError struct{}
4549
func (tryCPUIDError) Error() string { return "cpuid emulation failed" }
4650

4751
// Switch runs the provided platformContext in the given address space.
48-
func (c *platformContext) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac *arch.Context64, _ int32) (*linux.SignalInfo, hostarch.AccessType, error) {
52+
func (c *platformContext) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac *arch.Context64, rseqCPU int32) (*linux.SignalInfo, hostarch.AccessType, error) {
4953
as := mm.AddressSpace()
5054
localAS := as.(*addressSpace)
5155

@@ -58,6 +62,20 @@ restart:
5862
c.machine.Put(cpu) // Already preempted.
5963
return nil, hostarch.NoAccess, platform.ErrContextInterrupt
6064
}
65+
// If this CPU was last used to run a different context
66+
// or if this context last ran on a different CPU, then we've
67+
// been preempted.
68+
last := cpu.lastCtx.Swap(c)
69+
c.lastUsedCPU.Store(int32(cpu.id))
70+
preempted := rseqCPU >= 0 && (last != c || rseqCPU != int32(cpu.id))
71+
if preempted {
72+
// Release resources.
73+
c.machine.Put(cpu)
74+
75+
// All done.
76+
c.interrupt.Disable()
77+
return nil, hostarch.NoAccess, platform.ErrContextCPUPreempted
78+
}
6179

6280
// Set the active address space.
6381
//
@@ -136,3 +154,8 @@ func (c *platformContext) PullFullState(as platform.AddressSpace, ac *arch.Conte
136154

137155
// PrepareSleep implements platform.Context.platform.Context.
138156
func (*platformContext) PrepareSleep() {}
157+
158+
// LastCPUNumber implements platform.Context.LastCPUNumber.
159+
func (c *platformContext) LastCPUNumber() int32 {
160+
return c.lastUsedCPU.Load()
161+
}

pkg/sentry/platform/kvm/kvm.go

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ type runData struct {
6262

6363
// KVM represents a lightweight VM context.
6464
type KVM struct {
65-
platform.NoCPUPreemptionDetection
66-
6765
// KVM never changes mm_structs.
6866
platform.UseHostProcessMemoryBarrier
6967

@@ -180,6 +178,38 @@ func (k *KVM) ConcurrencyCount() int {
180178
return k.machine.maxVCPUs
181179
}
182180

181+
// HasCPUNumbers implements platform.Platform.HasCPUNumbers.
182+
func (*KVM) HasCPUNumbers() bool {
183+
return true
184+
}
185+
186+
// NumCPUs implements platform.Platform.NumCPUs.
187+
func (k *KVM) NumCPUs() int32 {
188+
return int32(k.machine.maxVCPUs)
189+
}
190+
191+
// DetectsCPUPreemption implements platform.Platform.DetectsCPUPreemption.
192+
func (k *KVM) DetectsCPUPreemption() bool {
193+
return true
194+
}
195+
196+
// PreemptAllCPUs implements platform.Platform.PreemptAllCPUs.
197+
func (k *KVM) PreemptAllCPUs() error {
198+
for _, c := range k.machine.vCPUsByID {
199+
c.lastCtx.Store(nil)
200+
c.BounceToHost()
201+
}
202+
return nil
203+
}
204+
205+
// PreemptCPU implements platform.Platform.PreemptCPU.
206+
func (k *KVM) PreemptCPU(cpu int32) error {
207+
c := k.machine.vCPUsByID[cpu]
208+
c.lastCtx.Store(nil)
209+
c.BounceToHost()
210+
return nil
211+
}
212+
183213
// NewContext returns an interruptible context.
184214
func (k *KVM) NewContext(pkgcontext.Context) platform.Context {
185215
return &platformContext{

pkg/sentry/platform/kvm/machine.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,9 @@ type vCPU struct {
216216

217217
// dieState holds state related to vCPU death.
218218
dieState dieState
219+
220+
// lastCtx is the last context that was scheduled on this vCPU
221+
lastCtx atomic.Pointer[platformContext]
219222
}
220223

221224
type dieState struct {

pkg/sentry/platform/platform.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,17 @@ type Platform interface {
9696
// NewContext returns a new execution context.
9797
NewContext(context.Context) Context
9898

99+
// PreemptAllCPUs causes all concurrent calls to Context.Switch() on the given CPU, as well
100+
// as the first following call to Context.Switch() for each Context, to
101+
// return ErrContextCPUPreempted.
102+
//
103+
// Precondition(s): cpu must be in the range [0, NumCPUs()).
104+
//
105+
// PreemptCPU is only supported if DetectsCPUPremption() && HasCPUNumbers() == true.
106+
// Platforms for which this does not hold may panic if PreemptCPU is
107+
// called.
108+
PreemptCPU(cpu int32) error
109+
99110
// PreemptAllCPUs causes all concurrent calls to Context.Switch(), as well
100111
// as the first following call to Context.Switch() for each Context, to
101112
// return ErrContextCPUPreempted.
@@ -121,6 +132,12 @@ type Platform interface {
121132
// in parallel. Concurrent calls to Context.Switch() beyond
122133
// ConcurrencyCount() may block until previous calls have returned.
123134
ConcurrencyCount() int
135+
136+
// HasCPUNumbers returns true if platform assigns CPU numbers to contexts.
137+
HasCPUNumbers() bool
138+
139+
// NumCPUs returns the number of CPUs on the platform.
140+
NumCPUs() int32
124141
}
125142

126143
// NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and
@@ -137,6 +154,25 @@ func (NoCPUPreemptionDetection) PreemptAllCPUs() error {
137154
panic("This platform does not support CPU preemption detection")
138155
}
139156

157+
// NoCPUNumbers implements Platform.HasCPUNumbers for platforms that do
158+
// not support it.
159+
type NoCPUNumbers struct{}
160+
161+
// HasCPUNumbers implements Platform.HasCPUNumbers.
162+
func (NoCPUNumbers) HasCPUNumbers() bool {
163+
return false
164+
}
165+
166+
// NumCPUs implements Platform.NumCPUs.
167+
func (NoCPUNumbers) NumCPUs() int32 {
168+
panic("platform does not support CPU numbers")
169+
}
170+
171+
// PreemptCPU implements Platform.PreemptCPU.
172+
func (NoCPUNumbers) PreemptCPU(cpu int32) error {
173+
panic("platform does not support preempting a specific CPU")
174+
}
175+
140176
// UseHostGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier and
141177
// Platform.GlobalMemoryBarrier by invoking equivalent functionality on the
142178
// host.
@@ -264,6 +300,16 @@ type Context interface {
264300
// PrepareSleep() is called when the thread switches to the
265301
// interruptible sleep state.
266302
PrepareSleep()
303+
304+
// LastCPUNumber returns the last CPU number that this context was running on.
305+
// If the context never ran on a CPU, it may return any valid CPU number, as long as the first
306+
// call to Switch will detect that the CPU number is incorrect and return ErrContextCPUPreempted.
307+
LastCPUNumber() int32
308+
}
309+
310+
// LastCPUNumber implements Context.LastCPUNumber.
311+
func (NoCPUNumbers) LastCPUNumber() int32 {
312+
panic("context does not support last CPU number")
267313
}
268314

269315
// ContextError is one of the possible errors returned by Context.Switch().

pkg/sentry/platform/ptrace/ptrace.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ var (
7474

7575
type context struct {
7676
archContext
77+
platform.NoCPUNumbers
7778

7879
// signalInfo is the signal info, if and when a signal is received.
7980
signalInfo linux.SignalInfo
@@ -214,6 +215,7 @@ type PTrace struct {
214215
platform.MMapMinAddr
215216
platform.NoCPUPreemptionDetection
216217
platform.UseHostGlobalMemoryBarrier
218+
platform.NoCPUNumbers
217219
}
218220

219221
// New returns a new ptrace-based implementation of the platform interface.

pkg/sentry/platform/systrap/systrap.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ var (
125125

126126
// platformContext is an implementation of the platform context.
127127
type platformContext struct {
128+
platform.NoCPUNumbers
129+
128130
// signalInfo is the signal info, if and when a signal is received.
129131
signalInfo linux.SignalInfo
130132

@@ -239,6 +241,7 @@ func (c *platformContext) PrepareSleep() {
239241
type Systrap struct {
240242
platform.NoCPUPreemptionDetection
241243
platform.UseHostGlobalMemoryBarrier
244+
platform.NoCPUNumbers
242245

243246
// memoryFile is used to create a stub sysmsg stack which is shared with
244247
// the Sentry. Since memoryFile is platform-private, it is never restored,

0 commit comments

Comments
 (0)