@@ -353,6 +353,102 @@ func Gosched() {
353353 mcall (gosched_m )
354354}
355355
356+ // Yield cooperatively yields if, and only if, the scheduler is "busy".
357+ //
358+ // This can be called by any work wishing to utilize strictly spare capacity
359+ // while minimizing the degree to which it delays other work from being promptly
360+ // scheduled.
361+ //
362+ // Yield is intended to be very low overhead, particularly in the no-op case
363+ // where the scheduler is not at capacity, to ensure it can be called often
364+ // enough in tasks wishing to yield promptly to waiting work when needed.
365+ // needed. When the scheduler is busy, the yielded goroutine can be parked in a
366+ // waiting state until the scheduler has idle capacity again to resume it.
367+ //
368+ // A goroutine calling Yield may be parked in a yielded state for an arbitrary
369+ // amount of time as long as the scheduler remains busy; callers should consider
370+ // this when deciding where to and where not to yield, such as considering when
371+ // locks that are contended by other work might be held.
372+ //
373+ // Yield will never park if the calling goroutine is locked to an OS thread.
374+ //
375+ // go:nosplit
376+ func Yield () {
377+ // Common/fast case: do nothing if haswork is zero. Doing only this check here
378+ // and leaving more detailed decisions to yield_slow keeps this wrapper
379+ // inlineable (complexity cost as of writing is 70 out of the allowed 80).
380+ if sched .haswork .Load () != 0 {
381+ yield_slow ()
382+ }
383+ }
384+
385+ // yield_slow is intended to be called after a check of haswork suggests that
386+ // yielding would be appreciated to determine how to actually yield (to P's
387+ // local runq vs parking in yieldq). It is split out to ensure Yield() and its
388+ // cheap check of haswork remains inlineable.
389+ //
390+ // If there is work on the local runq, the cheapest option is to just hop behind
391+ // it in the local runq to let it run and then pick back up. However this will
392+ // end up thrashing if the work we yield to also then yields right back. We
393+ // don't mark goroutines in any way when they yield so we cannot directly detech
394+ // if the next goroutine in our local runq got there via a yield/will yield
395+ // back, so we can use a heuristic: if we ran for <100us, it is possible we are
396+ // thrashing so we can go park in the yieldq to let the remaining local runq
397+ // work drain.
398+ //
399+ // If there is no work in the local and global run queues but haswork got us
400+ // here, it is likely there is work on a different P's local queue: we could
401+ // immediately park in the yieldq to free this P to go try to steal, but we
402+ // would prefer that the work currently running on that P yield to it (or
403+ // finish/block/be preempted) instead of parking this work, stealing that work,
404+ // and then unparking this work again.
405+ //
406+ // At the same time, we *do* want to yield -- that's why we are here -- if there
407+ // is work waiting for a chance to run. We can balance our preference to give
408+ // the other P a chance to just run it vs not making it wait too longwith a
409+ // heuristic: an ideal one might use how long that work has been waiting (either
410+ // by changing haswork to be a time or by locally remembering when/how many times
411+ // we see it non-zero), but a simple rule that uses the existing fields for now
412+ // is just to go park if we have been running for 1ms: this bounds how long we
413+ // defer parking (to at most 1ms) and while we might park immediately if we were
414+ // already running >1ms before haswork was set, at least the fact we ran for 1ms
415+ // means the overhead of parking and unparking may be proportionally lower.
416+ //
417+ // If the global runq has work, we always park right away, as unlike the other-P
418+ // local runq case, there isn't a P we think is better suited to running it, so
419+ // we should just do it.
420+ func yield_slow () {
421+ gp := getg ()
422+
423+ running := nanotime () - gp .lastsched
424+ if ! runqempty (gp .m .p .ptr ()) {
425+ if running > 100_000 {
426+ goyield ()
427+ return
428+ }
429+ } else if sched .runqsize == 0 && running < 1_000_000 { //1ms.
430+
431+ return
432+ }
433+
434+ // Don't park while locked to an OS thread.
435+ if gp .lockedm != 0 {
436+ return
437+ }
438+
439+ // Eagerly decrement haswork rather than waiting for findRunnable to reset it
440+ // to try to minimize a thundering herd of yields, but if this detects that it
441+ // raced with a reset/other decrement, restore the count to 0 and don't yield.
442+ if sched .haswork .Add (- 1 ) < 0 {
443+ sched .haswork .Store (0 )
444+ return
445+ }
446+
447+ checkTimeouts ()
448+ // traceskip=1 so stacks show runtime.Yield
449+ gopark (yield_put , nil , waitReasonYield , traceBlockPreempted , 1 )
450+ }
451+
356452// goschedguarded yields the processor like gosched, but also checks
357453// for forbidden states and opts out of the yield in those cases.
358454//
@@ -3165,6 +3261,7 @@ func wakep() {
31653261 lock (& sched .lock )
31663262 pp , _ = pidlegetSpinning (0 )
31673263 if pp == nil {
3264+ sched .haswork .Add (1 )
31683265 if sched .nmspinning .Add (- 1 ) < 0 {
31693266 throw ("wakep: negative nmspinning" )
31703267 }
@@ -3445,6 +3542,29 @@ top:
34453542 }
34463543 }
34473544
3545+ sched .haswork .Store (0 )
3546+
3547+ // As a last resort before we give up the P, try yieldq.
3548+ if sched .yieldqsize != 0 {
3549+ lock (& sched .lock )
3550+ bg := sched .yieldq .pop ()
3551+ if bg != nil {
3552+ sched .yieldqsize --
3553+ }
3554+ unlock (& sched .lock )
3555+ if bg != nil {
3556+ // Transition from _Gwaiting (yield) to _Grunnable.
3557+ trace := traceAcquire ()
3558+ casgstatus (bg , _Gwaiting , _Grunnable )
3559+ if trace .ok () {
3560+ // Match other ready paths for trace visibility.
3561+ trace .GoUnpark (bg , 0 )
3562+ traceRelease (trace )
3563+ }
3564+ return bg , false , false
3565+ }
3566+ }
3567+
34483568 // We have nothing to do.
34493569 //
34503570 // If we're in the GC mark phase, can safely scan and blacken objects,
@@ -3509,6 +3629,10 @@ top:
35093629 unlock (& sched .lock )
35103630 return gp , false , false
35113631 }
3632+ if sched .yieldqsize != 0 {
3633+ unlock (& sched .lock )
3634+ goto top
3635+ }
35123636 if ! mp .spinning && sched .needspinning .Load () == 1 {
35133637 // See "Delicate dance" comment below.
35143638 mp .becomeSpinning ()
@@ -3666,6 +3790,7 @@ top:
36663790 unlock (& sched .lock )
36673791 if pp == nil {
36683792 injectglist (& list )
3793+ sched .haswork .Add (1 )
36693794 netpollAdjustWaiters (delta )
36703795 } else {
36713796 acquirep (pp )
@@ -4889,6 +5014,7 @@ func exitsyscall0(gp *g) {
48895014 var locked bool
48905015 if pp == nil {
48915016 globrunqput (gp )
5017+ sched .haswork .Add (1 )
48925018
48935019 // Below, we stoplockedm if gp is locked. globrunqput releases
48945020 // ownership of gp, so we must check if gp is locked prior to
@@ -7111,6 +7237,20 @@ func (q *gQueue) popList() gList {
71117237 return stack
71127238}
71137239
7240+ // yield_put is the gopark unlock function for Yield. It enqueues the goroutine
7241+ // onto the global yield queue. Returning true keeps the G parked until another
7242+ // part of the scheduler makes it runnable again. The G remains in _Gwaiting
7243+ // after this returns.
7244+ //
7245+ //go:nosplit
7246+ func yield_put (gp * g , _ unsafe.Pointer ) bool {
7247+ lock (& sched .lock )
7248+ sched .yieldq .pushBack (gp )
7249+ sched .yieldqsize ++
7250+ unlock (& sched .lock )
7251+ return true
7252+ }
7253+
71147254// A gList is a list of Gs linked through g.schedlink. A G can only be
71157255// on one gQueue or gList at a time.
71167256type gList struct {
0 commit comments