Skip to content

Commit e673937

Browse files
committed
runtime: add runtime.Yield
1 parent ec86954 commit e673937

File tree

2 files changed

+150
-0
lines changed

2 files changed

+150
-0
lines changed

src/runtime/proc.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,102 @@ func Gosched() {
353353
mcall(gosched_m)
354354
}
355355

356+
// Yield cooperatively yields if, and only if, the scheduler is "busy".
357+
//
358+
// This can be called by any work wishing to utilize strictly spare capacity
359+
// while minimizing the degree to which it delays other work from being promptly
360+
// scheduled.
361+
//
362+
// Yield is intended to be very low overhead, particularly in the no-op case
363+
// where the scheduler is not at capacity, to ensure it can be called often
364+
// enough in tasks wishing to yield promptly to waiting work when needed.
365+
// needed. When the scheduler is busy, the yielded goroutine can be parked in a
366+
// waiting state until the scheduler has idle capacity again to resume it.
367+
//
368+
// A goroutine calling Yield may be parked in a yielded state for an arbitrary
369+
// amount of time as long as the scheduler remains busy; callers should consider
370+
// this when deciding where to and where not to yield, such as considering when
371+
// locks that are contended by other work might be held.
372+
//
373+
// Yield will never park if the calling goroutine is locked to an OS thread.
374+
//
375+
// go:nosplit
376+
func Yield() {
377+
// Common/fast case: do nothing if haswork is zero. Doing only this check here
378+
// and leaving more detailed decisions to yield_slow keeps this wrapper
379+
// inlineable (complexity cost as of writing is 70 out of the allowed 80).
380+
if sched.haswork.Load() != 0 {
381+
yield_slow()
382+
}
383+
}
384+
385+
// yield_slow is intended to be called after a check of haswork suggests that
386+
// yielding would be appreciated to determine how to actually yield (to P's
387+
// local runq vs parking in yieldq). It is split out to ensure Yield() and its
388+
// cheap check of haswork remains inlineable.
389+
//
390+
// If there is work on the local runq, the cheapest option is to just hop behind
391+
// it in the local runq to let it run and then pick back up. However this will
392+
// end up thrashing if the work we yield to also then yields right back. We
393+
// don't mark goroutines in any way when they yield so we cannot directly detech
394+
// if the next goroutine in our local runq got there via a yield/will yield
395+
// back, so we can use a heuristic: if we ran for <100us, it is possible we are
396+
// thrashing so we can go park in the yieldq to let the remaining local runq
397+
// work drain.
398+
//
399+
// If there is no work in the local and global run queues but haswork got us
400+
// here, it is likely there is work on a different P's local queue: we could
401+
// immediately park in the yieldq to free this P to go try to steal, but we
402+
// would prefer that the work currently running on that P yield to it (or
403+
// finish/block/be preempted) instead of parking this work, stealing that work,
404+
// and then unparking this work again.
405+
//
406+
// At the same time, we *do* want to yield -- that's why we are here -- if there
407+
// is work waiting for a chance to run. We can balance our preference to give
408+
// the other P a chance to just run it vs not making it wait too longwith a
409+
// heuristic: an ideal one might use how long that work has been waiting (either
410+
// by changing haswork to be a time or by locally remembering when/how many times
411+
// we see it non-zero), but a simple rule that uses the existing fields for now
412+
// is just to go park if we have been running for 1ms: this bounds how long we
413+
// defer parking (to at most 1ms) and while we might park immediately if we were
414+
// already running >1ms before haswork was set, at least the fact we ran for 1ms
415+
// means the overhead of parking and unparking may be proportionally lower.
416+
//
417+
// If the global runq has work, we always park right away, as unlike the other-P
418+
// local runq case, there isn't a P we think is better suited to running it, so
419+
// we should just do it.
420+
func yield_slow() {
421+
gp := getg()
422+
423+
running := nanotime() - gp.lastsched
424+
if !runqempty(gp.m.p.ptr()) {
425+
if running > 100_000 {
426+
goyield()
427+
return
428+
}
429+
} else if sched.runqsize == 0 && running < 1_000_000 { //1ms.
430+
431+
return
432+
}
433+
434+
// Don't park while locked to an OS thread.
435+
if gp.lockedm != 0 {
436+
return
437+
}
438+
439+
// Eagerly decrement haswork rather than waiting for findRunnable to reset it
440+
// to try to minimize a thundering herd of yields, but if this detects that it
441+
// raced with a reset/other decrement, restore the count to 0 and don't yield.
442+
if sched.haswork.Add(-1) < 0 {
443+
sched.haswork.Store(0)
444+
return
445+
}
446+
447+
checkTimeouts()
448+
// traceskip=1 so stacks show runtime.Yield
449+
gopark(yield_put, nil, waitReasonYield, traceBlockPreempted, 1)
450+
}
451+
356452
// goschedguarded yields the processor like gosched, but also checks
357453
// for forbidden states and opts out of the yield in those cases.
358454
//
@@ -3165,6 +3261,7 @@ func wakep() {
31653261
lock(&sched.lock)
31663262
pp, _ = pidlegetSpinning(0)
31673263
if pp == nil {
3264+
sched.haswork.Add(1)
31683265
if sched.nmspinning.Add(-1) < 0 {
31693266
throw("wakep: negative nmspinning")
31703267
}
@@ -3445,6 +3542,29 @@ top:
34453542
}
34463543
}
34473544

3545+
sched.haswork.Store(0)
3546+
3547+
// As a last resort before we give up the P, try yieldq.
3548+
if sched.yieldqsize != 0 {
3549+
lock(&sched.lock)
3550+
bg := sched.yieldq.pop()
3551+
if bg != nil {
3552+
sched.yieldqsize--
3553+
}
3554+
unlock(&sched.lock)
3555+
if bg != nil {
3556+
// Transition from _Gwaiting (yield) to _Grunnable.
3557+
trace := traceAcquire()
3558+
casgstatus(bg, _Gwaiting, _Grunnable)
3559+
if trace.ok() {
3560+
// Match other ready paths for trace visibility.
3561+
trace.GoUnpark(bg, 0)
3562+
traceRelease(trace)
3563+
}
3564+
return bg, false, false
3565+
}
3566+
}
3567+
34483568
// We have nothing to do.
34493569
//
34503570
// If we're in the GC mark phase, can safely scan and blacken objects,
@@ -3509,6 +3629,10 @@ top:
35093629
unlock(&sched.lock)
35103630
return gp, false, false
35113631
}
3632+
if sched.yieldqsize != 0 {
3633+
unlock(&sched.lock)
3634+
goto top
3635+
}
35123636
if !mp.spinning && sched.needspinning.Load() == 1 {
35133637
// See "Delicate dance" comment below.
35143638
mp.becomeSpinning()
@@ -3666,6 +3790,7 @@ top:
36663790
unlock(&sched.lock)
36673791
if pp == nil {
36683792
injectglist(&list)
3793+
sched.haswork.Add(1)
36693794
netpollAdjustWaiters(delta)
36703795
} else {
36713796
acquirep(pp)
@@ -4889,6 +5014,7 @@ func exitsyscall0(gp *g) {
48895014
var locked bool
48905015
if pp == nil {
48915016
globrunqput(gp)
5017+
sched.haswork.Add(1)
48925018

48935019
// Below, we stoplockedm if gp is locked. globrunqput releases
48945020
// ownership of gp, so we must check if gp is locked prior to
@@ -7111,6 +7237,20 @@ func (q *gQueue) popList() gList {
71117237
return stack
71127238
}
71137239

7240+
// yield_put is the gopark unlock function for Yield. It enqueues the goroutine
7241+
// onto the global yield queue. Returning true keeps the G parked until another
7242+
// part of the scheduler makes it runnable again. The G remains in _Gwaiting
7243+
// after this returns.
7244+
//
7245+
//go:nosplit
7246+
func yield_put(gp *g, _ unsafe.Pointer) bool {
7247+
lock(&sched.lock)
7248+
sched.yieldq.pushBack(gp)
7249+
sched.yieldqsize++
7250+
unlock(&sched.lock)
7251+
return true
7252+
}
7253+
71147254
// A gList is a list of Gs linked through g.schedlink. A G can only be
71157255
// on one gQueue or gList at a time.
71167256
type gList struct {

src/runtime/runtime2.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,10 @@ type schedt struct {
781781
lastpoll atomic.Int64 // time of last network poll, 0 if currently polling
782782
pollUntil atomic.Int64 // time to which current poll is sleeping
783783

784+
// haswork is set when work is queued but no idle P is available.
785+
// It signals scheduler saturation for yield decisions.
786+
haswork atomic.Uint32
787+
784788
lock mutex
785789

786790
// When increasing nmidle, nmidlelocked, nmsys, or nmfreed, be
@@ -804,6 +808,10 @@ type schedt struct {
804808
// Global runnable queue.
805809
runq gQueue
806810
runqsize int32
811+
// Global background-yield queue: goroutines that voluntarily yielded
812+
// while the scheduler was busy. Does NOT contribute to runqsize.
813+
yieldq gQueue
814+
yieldqsize int32
807815

808816
// disable controls selective disabling of the scheduler.
809817
//
@@ -1099,6 +1107,7 @@ const (
10991107
waitReasonTraceProcStatus // "trace proc status"
11001108
waitReasonPageTraceFlush // "page trace flush"
11011109
waitReasonCoroutine // "coroutine"
1110+
waitReasonYield // "yield"
11021111
waitReasonGCWeakToStrongWait // "GC weak to strong wait"
11031112
)
11041113

@@ -1140,6 +1149,7 @@ var waitReasonStrings = [...]string{
11401149
waitReasonTraceProcStatus: "trace proc status",
11411150
waitReasonPageTraceFlush: "page trace flush",
11421151
waitReasonCoroutine: "coroutine",
1152+
waitReasonYield: "yield",
11431153
waitReasonGCWeakToStrongWait: "GC weak to strong wait",
11441154
}
11451155

0 commit comments

Comments
 (0)