Skip to content
This repository was archived by the owner on Apr 13, 2023. It is now read-only.

Commit 9ab6c45

Browse files
committed
sched_ext: Track tasks that are subjects of the in-flight SCX operation
When some SCX operations are in flight, it is known that the subject task's rq lock is held throughout which makes it safe to access certain fields of the task - e.g. its current task_group. We want to add SCX kfunc helpers that can make use of this guarantee - e.g. to help determining the currently associated CPU cgroup from the task's current task_group. As it'd be dangerous call such a helper on a task which isn't rq lock protected, the helper should be able to verify the input task and reject accordingly. This patch adds sched_ext_entity.kf_tasks[] that track the tasks which are currently being operated on by a terminal SCX operation. The new SCX_CALL_OP_[2]TASK[_RET]() can be used when invoking SCX operations which take tasks as arguments and the scx_kf_allowed_on_arg_tasks() can be used by kfunc helpers to verify the input task status. Note that as sched_ext_entity.kf_tasks[] can't handle nesting, the tracking is currently only limited to terminal SCX operations. If needed in the future, this restriction can be removed by moving the tracking to the task side with a couple per-task counters. Signed-off-by: Tejun Heo <[email protected]> Reviewed-by: David Vernet <[email protected]>
1 parent ee794ea commit 9ab6c45

File tree

2 files changed

+76
-17
lines changed

2 files changed

+76
-17
lines changed

include/linux/sched/ext.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ enum scx_kf_mask {
449449
SCX_KF_REST = 1 << 5, /* other rq-locked operations */
450450

451451
__SCX_KF_RQ_LOCKED = SCX_KF_DISPATCH | SCX_KF_ENQUEUE | SCX_KF_REST,
452+
__SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_REST,
452453
};
453454

454455
/*
@@ -464,6 +465,7 @@ struct sched_ext_entity {
464465
s32 sticky_cpu;
465466
s32 holding_cpu;
466467
u32 kf_mask; /* see scx_kf_mask above */
468+
struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */
467469
atomic64_t ops_state;
468470
unsigned long runnable_at;
469471

kernel/sched/ext.c

Lines changed: 74 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,47 @@ do { \
239239
__ret; \
240240
})
241241

242+
/*
243+
* Some kfuncs are allowed only on the tasks that are subjects of the
244+
* in-progress scx_ops operation for, e.g., locking guarantees. To enforce such
245+
* restrictions, the following SCX_CALL_OP_*() variants should be used when
246+
* invoking scx_ops operations that take task arguments. These can only be used
247+
* for non-nesting operations due to the way the tasks are tracked.
248+
*
249+
* kfuncs which can only operate on such tasks can in turn use
250+
* scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on
251+
* the specific task.
252+
*/
253+
#define SCX_CALL_OP_TASK(mask, op, task, args...) \
254+
do { \
255+
BUILD_BUG_ON(mask & ~__SCX_KF_TERMINAL); \
256+
current->scx.kf_tasks[0] = task; \
257+
SCX_CALL_OP(mask, op, task, ##args); \
258+
current->scx.kf_tasks[0] = NULL; \
259+
} while (0)
260+
261+
#define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \
262+
({ \
263+
__typeof__(scx_ops.op(task, ##args)) __ret; \
264+
BUILD_BUG_ON(mask & ~__SCX_KF_TERMINAL); \
265+
current->scx.kf_tasks[0] = task; \
266+
__ret = SCX_CALL_OP_RET(mask, op, task, ##args); \
267+
current->scx.kf_tasks[0] = NULL; \
268+
__ret; \
269+
})
270+
271+
#define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \
272+
({ \
273+
__typeof__(scx_ops.op(task0, task1, ##args)) __ret; \
274+
BUILD_BUG_ON(mask & ~__SCX_KF_TERMINAL); \
275+
current->scx.kf_tasks[0] = task0; \
276+
current->scx.kf_tasks[1] = task1; \
277+
__ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \
278+
current->scx.kf_tasks[0] = NULL; \
279+
current->scx.kf_tasks[1] = NULL; \
280+
__ret; \
281+
})
282+
242283
/* @mask is constant, always inline to cull unnecessary branches */
243284
static __always_inline bool scx_kf_allowed(u32 mask)
244285
{
@@ -269,6 +310,22 @@ static __always_inline bool scx_kf_allowed(u32 mask)
269310
return true;
270311
}
271312

313+
/* see SCX_CALL_OP_TASK() */
314+
static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask,
315+
struct task_struct *p)
316+
{
317+
if (!scx_kf_allowed(__SCX_KF_RQ_LOCKED))
318+
return false;
319+
320+
if (unlikely((p != current->scx.kf_tasks[0] &&
321+
p != current->scx.kf_tasks[1]))) {
322+
scx_ops_error("called on a task not being operated on");
323+
return false;
324+
}
325+
326+
return true;
327+
}
328+
272329
/**
273330
* scx_task_iter_init - Initialize a task iterator
274331
* @iter: iterator to init
@@ -706,7 +763,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
706763
WARN_ON_ONCE(*ddsp_taskp);
707764
*ddsp_taskp = p;
708765

709-
SCX_CALL_OP(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
766+
SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
710767

711768
/*
712769
* If not directly dispatched, QUEUEING isn't clear yet and dispatch or
@@ -778,7 +835,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
778835
add_nr_running(rq, 1);
779836

780837
if (SCX_HAS_OP(runnable))
781-
SCX_CALL_OP(SCX_KF_REST, runnable, p, enq_flags);
838+
SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags);
782839

783840
do_enqueue_task(rq, p, enq_flags, sticky_cpu);
784841
}
@@ -803,7 +860,7 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
803860
BUG();
804861
case SCX_OPSS_QUEUED:
805862
if (SCX_HAS_OP(dequeue))
806-
SCX_CALL_OP(SCX_KF_REST, dequeue, p, deq_flags);
863+
SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags);
807864

808865
if (atomic64_try_cmpxchg(&p->scx.ops_state, &opss,
809866
SCX_OPSS_NONE))
@@ -854,11 +911,11 @@ static void dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
854911
*/
855912
if (SCX_HAS_OP(stopping) && task_current(rq, p)) {
856913
update_curr_scx(rq);
857-
SCX_CALL_OP(SCX_KF_REST, stopping, p, false);
914+
SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false);
858915
}
859916

860917
if (SCX_HAS_OP(quiescent))
861-
SCX_CALL_OP(SCX_KF_REST, quiescent, p, deq_flags);
918+
SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags);
862919

863920
if (deq_flags & SCX_DEQ_SLEEP)
864921
p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP;
@@ -877,7 +934,7 @@ static void yield_task_scx(struct rq *rq)
877934
struct task_struct *p = rq->curr;
878935

879936
if (SCX_HAS_OP(yield))
880-
SCX_CALL_OP_RET(SCX_KF_REST, yield, p, NULL);
937+
SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL);
881938
else
882939
p->scx.slice = 0;
883940
}
@@ -887,7 +944,7 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
887944
struct task_struct *from = rq->curr;
888945

889946
if (SCX_HAS_OP(yield))
890-
return SCX_CALL_OP_RET(SCX_KF_REST, yield, from, to);
947+
return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to);
891948
else
892949
return false;
893950
}
@@ -1398,7 +1455,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
13981455

13991456
/* see dequeue_task_scx() on why we skip when !QUEUED */
14001457
if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED))
1401-
SCX_CALL_OP(SCX_KF_REST, running, p);
1458+
SCX_CALL_OP_TASK(SCX_KF_REST, running, p);
14021459

14031460
watchdog_unwatch_task(p, true);
14041461

@@ -1454,7 +1511,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p)
14541511

14551512
/* see dequeue_task_scx() on why we skip when !QUEUED */
14561513
if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED))
1457-
SCX_CALL_OP(SCX_KF_REST, stopping, p, true);
1514+
SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true);
14581515

14591516
/*
14601517
* If we're being called from put_prev_task_balance(), balance_scx() may
@@ -1617,8 +1674,8 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
16171674
if (SCX_HAS_OP(select_cpu)) {
16181675
s32 cpu;
16191676

1620-
cpu = SCX_CALL_OP_RET(SCX_KF_REST, select_cpu, p, prev_cpu,
1621-
wake_flags);
1677+
cpu = SCX_CALL_OP_TASK_RET(SCX_KF_REST, select_cpu, p, prev_cpu,
1678+
wake_flags);
16221679
if (ops_cpu_valid(cpu)) {
16231680
return cpu;
16241681
} else {
@@ -1644,8 +1701,8 @@ static void set_cpus_allowed_scx(struct task_struct *p,
16441701
* designation pointless. Cast it away when calling the operation.
16451702
*/
16461703
if (SCX_HAS_OP(set_cpumask))
1647-
SCX_CALL_OP(SCX_KF_REST, set_cpumask, p,
1648-
(struct cpumask *)p->cpus_ptr);
1704+
SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
1705+
(struct cpumask *)p->cpus_ptr);
16491706
}
16501707

16511708
static void reset_idle_masks(void)
@@ -1806,7 +1863,7 @@ static void scx_ops_enable_task(struct task_struct *p)
18061863

18071864
if (SCX_HAS_OP(enable)) {
18081865
struct scx_enable_args args = { };
1809-
SCX_CALL_OP(SCX_KF_REST, enable, p, &args);
1866+
SCX_CALL_OP_TASK(SCX_KF_REST, enable, p, &args);
18101867
}
18111868
p->scx.flags &= ~SCX_TASK_OPS_PREPPED;
18121869
p->scx.flags |= SCX_TASK_OPS_ENABLED;
@@ -1845,7 +1902,7 @@ static void refresh_scx_weight(struct task_struct *p)
18451902

18461903
p->scx.weight = sched_weight_to_cgroup(weight);
18471904
if (SCX_HAS_OP(set_weight))
1848-
SCX_CALL_OP(SCX_KF_REST, set_weight, p, p->scx.weight);
1905+
SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
18491906
}
18501907

18511908
void scx_pre_fork(struct task_struct *p)
@@ -1936,8 +1993,8 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
19361993
* different scheduler class. Keep the BPF scheduler up-to-date.
19371994
*/
19381995
if (SCX_HAS_OP(set_cpumask))
1939-
SCX_CALL_OP(SCX_KF_REST, set_cpumask, p,
1940-
(struct cpumask *)p->cpus_ptr);
1996+
SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
1997+
(struct cpumask *)p->cpus_ptr);
19411998
}
19421999

19432000
static void check_preempt_curr_scx(struct rq *rq, struct task_struct *p,int wake_flags) {}

0 commit comments

Comments
 (0)