Skip to content

Commit 642d058

Browse files
committed
runtime: add runtime.Yield
1 parent ec86954 commit 642d058

File tree

2 files changed

+159
-0
lines changed

2 files changed

+159
-0
lines changed

src/runtime/proc.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,106 @@ func Gosched() {
353353
mcall(gosched_m)
354354
}
355355

356+
// Yield cooperatively yields if, and only if, the scheduler is "busy".
357+
//
358+
// This can be called by any work wishing to utilize strictly spare capacity
359+
// while minimizing the degree to which it delays other work from being promptly
360+
// scheduled.
361+
//
362+
// Yield is intended to be very low overhead, particularly in the no-op case
363+
// where the scheduler is not at capacity, to ensure it can be called often
364+
// enough in tasks wishing to yield promptly to waiting work when needed.
365+
// needed. When the scheduler is busy, the yielded goroutine can be parked in a
366+
// waiting state until the scheduler has idle capacity again to resume it.
367+
//
368+
// A goroutine calling Yield may be parked in a yielded state for an arbitrary
369+
// amount of time as long as the scheduler remains busy; callers should consider
370+
// this when deciding where to and where not to yield, such as considering when
371+
// locks that are contended by other work might be held.
372+
//
373+
// Yield will never park if the calling goroutine is locked to an OS thread.
374+
//
375+
// go:nosplit
376+
func Yield() {
377+
// Common/fast case: do nothing if ngqueued is zero. Doing only this check here
378+
// and leaving more detailed decisions to yield_slow keeps this wrapper
379+
// inlineable (complexity cost as of writing is 70 out of the allowed 80).
380+
if sched.ngqueued.Load() != 0 {
381+
yield_slow()
382+
}
383+
}
384+
385+
// yield_slow is intended to be called after a check of ngqueued suggests that
386+
// yielding would be appreciated to determine how to actually yield (to P's
387+
// local runq vs parking in yieldq). It is split out to ensure Yield() and its
388+
// cheap check of ngqueued remains inlineable.
389+
//
390+
// If there is work on the local runq, the cheapest option is to just hop behind
391+
// it in the local runq to let it run and then pick back up. However this will
392+
// end up thrashing if the work we yield to also then yields right back. We
393+
// don't mark goroutines in any way when they yield so we cannot directly detech
394+
// if the next goroutine in our local runq got there via a yield/will yield
395+
// back, so we can use a heuristic: if we ran for <100us, it is possible we are
396+
// thrashing so we can go park in the yieldq to let the remaining local runq
397+
// work drain.
398+
//
399+
// If there is no work in the local and global run queues but ngqueued got us
400+
// here, it is likely there is work on a different P's local queue: we could
401+
// immediately park in the yieldq to free this P to go try to steal, but we
402+
// would prefer that the work currently running on that P yield to it (or
403+
// finish/block/be preempted) instead of parking this work, stealing that work,
404+
// and then unparking this work again.
405+
//
406+
// At the same time, we *do* want to yield -- that's why we are here -- if there
407+
// is work waiting for a chance to run. We can balance our preference to give
408+
// the other P a chance to just run it vs not making it wait too longwith a
409+
// heuristic: an ideal one might use how long that work has been waiting (either
410+
// by changing ngqueued to be a time or by locally remembering when/how many times
411+
// we see it non-zero), but a simple rule that uses the existing fields for now
412+
// is just to go park if we have been running for 1ms: this bounds how long we
413+
// defer parking (to at most 1ms) and while we might park immediately if we were
414+
// already running >1ms before ngqueued was set, at least the fact we ran for 1ms
415+
// means the overhead of parking and unparking may be proportionally lower.
416+
//
417+
// If the global runq has work, we always park right away, as unlike the other-P
418+
// local runq case, there isn't a P we think is better suited to running it, so
419+
// we should just do it.
420+
func yield_slow() {
421+
gp := getg()
422+
423+
running := nanotime() - gp.lastsched
424+
if !runqempty(gp.m.p.ptr()) {
425+
if running > 100_000 { // 100us
426+
goyield()
427+
return
428+
}
429+
} else if sched.runqsize == 0 && running < 1_000_000 { // 1ms
430+
return
431+
}
432+
433+
// Don't park while locked to an OS thread.
434+
if gp.lockedm != 0 {
435+
return
436+
}
437+
438+
// Eagerly decrement ngqueued; we could leave it for findRunnable to reset it
439+
// next time it finds no work, but there could be a thundering herd of yields
440+
// in the meantime; we know we're parking to go find _some_ work so we can
441+
// decrement it by one right away. This decrement does race with the reset in
442+
// findRunnable, so if we notice it go negative, just reset it and skip yield.
443+
// Of course that too races with a concurrent increment but that's fine -
444+
// it is an approximate signal anyway.
445+
if sched.ngqueued.Add(-1) < 0 {
446+
sched.ngqueued.Store(0)
447+
return
448+
}
449+
450+
checkTimeouts()
451+
452+
// traceskip=1 so stacks show runtime.Yield
453+
gopark(yield_put, nil, waitReasonYield, traceBlockPreempted, 1)
454+
}
455+
356456
// goschedguarded yields the processor like gosched, but also checks
357457
// for forbidden states and opts out of the yield in those cases.
358458
//
@@ -3165,6 +3265,7 @@ func wakep() {
31653265
lock(&sched.lock)
31663266
pp, _ = pidlegetSpinning(0)
31673267
if pp == nil {
3268+
sched.ngqueued.Add(1)
31683269
if sched.nmspinning.Add(-1) < 0 {
31693270
throw("wakep: negative nmspinning")
31703271
}
@@ -3445,6 +3546,29 @@ top:
34453546
}
34463547
}
34473548

3549+
sched.ngqueued.Store(0)
3550+
3551+
// As a last resort before we give up the P, try yieldq.
3552+
if sched.yieldqsize != 0 {
3553+
lock(&sched.lock)
3554+
bg := sched.yieldq.pop()
3555+
if bg != nil {
3556+
sched.yieldqsize--
3557+
}
3558+
unlock(&sched.lock)
3559+
if bg != nil {
3560+
// Transition from _Gwaiting (yield) to _Grunnable.
3561+
trace := traceAcquire()
3562+
casgstatus(bg, _Gwaiting, _Grunnable)
3563+
if trace.ok() {
3564+
// Match other ready paths for trace visibility.
3565+
trace.GoUnpark(bg, 0)
3566+
traceRelease(trace)
3567+
}
3568+
return bg, false, false
3569+
}
3570+
}
3571+
34483572
// We have nothing to do.
34493573
//
34503574
// If we're in the GC mark phase, can safely scan and blacken objects,
@@ -3509,6 +3633,10 @@ top:
35093633
unlock(&sched.lock)
35103634
return gp, false, false
35113635
}
3636+
if sched.yieldqsize != 0 {
3637+
unlock(&sched.lock)
3638+
goto top
3639+
}
35123640
if !mp.spinning && sched.needspinning.Load() == 1 {
35133641
// See "Delicate dance" comment below.
35143642
mp.becomeSpinning()
@@ -3666,6 +3794,7 @@ top:
36663794
unlock(&sched.lock)
36673795
if pp == nil {
36683796
injectglist(&list)
3797+
sched.ngqueued.Add(1)
36693798
netpollAdjustWaiters(delta)
36703799
} else {
36713800
acquirep(pp)
@@ -4889,6 +5018,7 @@ func exitsyscall0(gp *g) {
48895018
var locked bool
48905019
if pp == nil {
48915020
globrunqput(gp)
5021+
sched.ngqueued.Add(1)
48925022

48935023
// Below, we stoplockedm if gp is locked. globrunqput releases
48945024
// ownership of gp, so we must check if gp is locked prior to
@@ -7111,6 +7241,20 @@ func (q *gQueue) popList() gList {
71117241
return stack
71127242
}
71137243

7244+
// yield_put is the gopark unlock function for Yield. It enqueues the goroutine
7245+
// onto the global yield queue. Returning true keeps the G parked until another
7246+
// part of the scheduler makes it runnable again. The G remains in _Gwaiting
7247+
// after this returns.
7248+
//
7249+
//go:nosplit
7250+
func yield_put(gp *g, _ unsafe.Pointer) bool {
7251+
lock(&sched.lock)
7252+
sched.yieldq.pushBack(gp)
7253+
sched.yieldqsize++
7254+
unlock(&sched.lock)
7255+
return true
7256+
}
7257+
71147258
// A gList is a list of Gs linked through g.schedlink. A G can only be
71157259
// on one gQueue or gList at a time.
71167260
type gList struct {

src/runtime/runtime2.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,10 +801,23 @@ type schedt struct {
801801
nmspinning atomic.Int32 // See "Worker thread parking/unparking" comment in proc.go.
802802
needspinning atomic.Uint32 // See "Delicate dance" comment in proc.go. Boolean. Must hold sched.lock to set to 1.
803803

804+
// ngqueued is a rough apprximation of the number of goroutines waiting for
805+
// scheduler capacity to run (incremented when an idle P is not found e.g. during wakep).
806+
// It is used to signal scheduler exhaustion for cooperative yield decisions;
807+
// it does not need to be exact as long as it broadly captures saturation.
808+
ngqueued atomic.Uint32
809+
804810
// Global runnable queue.
805811
runq gQueue
806812
runqsize int32
807813

814+
// yieldq holds goroutines that voluntarily yielded due to the scheduler
815+
// reporting capacity exhaustion. These were (are) runnable, but have moved to
816+
// waiting while they "block" on "spare" scheduler capacity opening up. Does NOT
817+
// contribute to runqsize.
818+
yieldq gQueue
819+
yieldqsize int32
820+
808821
// disable controls selective disabling of the scheduler.
809822
//
810823
// Use schedEnableUser to control this.
@@ -1099,6 +1112,7 @@ const (
10991112
waitReasonTraceProcStatus // "trace proc status"
11001113
waitReasonPageTraceFlush // "page trace flush"
11011114
waitReasonCoroutine // "coroutine"
1115+
waitReasonYield // "yield"
11021116
waitReasonGCWeakToStrongWait // "GC weak to strong wait"
11031117
)
11041118

@@ -1140,6 +1154,7 @@ var waitReasonStrings = [...]string{
11401154
waitReasonTraceProcStatus: "trace proc status",
11411155
waitReasonPageTraceFlush: "page trace flush",
11421156
waitReasonCoroutine: "coroutine",
1157+
waitReasonYield: "yield",
11431158
waitReasonGCWeakToStrongWait: "GC weak to strong wait",
11441159
}
11451160

0 commit comments

Comments
 (0)