struct scx_dsq_node contains two data structure nodes to link the containing task to a DSQ and a flags field that is protected by the lock of the associated DSQ. One reason why they are grouped into a struct is to use the type independently as a cursor node when iterating tasks on a DSQ. However, when iterating, the cursor only needs to be linked on the FIFO list and the rb_node part ends up inflating the size of the iterator data structure unnecessarily making it potentially too expensive to place it on stack. Take ->priq and ->flags out of scx_dsq_node and put them in sched_ext_entity as ->dsq_priq and ->dsq_flags, respectively. scx_dsq_node is renamed to scx_dsq_list_node and the field names are renamed accordingly. This will help implementing DSQ task iterator that can be allocated on stack. No functional change intended. Signed-off-by: Tejun Heo <tj@xxxxxxxxxx> Suggested-by: Alexei Starovoitov <ast@xxxxxxxxxx> Cc: David Vernet <void@xxxxxxxxxxxxx> --- include/linux/sched/ext.h | 10 ++++---- init/init_task.c | 2 - kernel/sched/ext.c | 54 +++++++++++++++++++++++----------------------- 3 files changed, 33 insertions(+), 33 deletions(-) --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -121,10 +121,8 @@ enum scx_kf_mask { __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, }; -struct scx_dsq_node { - struct list_head list; /* dispatch order */ - struct rb_node priq; /* p->scx.dsq_vtime order */ - u32 flags; /* SCX_TASK_DSQ_* flags */ +struct scx_dsq_list_node { + struct list_head node; }; /* @@ -133,7 +131,9 @@ struct scx_dsq_node { */ struct sched_ext_entity { struct scx_dispatch_q *dsq; - struct scx_dsq_node dsq_node; /* protected by dsq lock */ + struct scx_dsq_list_node dsq_list; /* dispatch order */ + struct rb_node dsq_priq; /* p->scx.dsq_vtime order */ + u32 dsq_flags; /* protected by DSQ lock */ u32 flags; /* protected by rq lock */ u32 weight; s32 sticky_cpu; --- a/init/init_task.c +++ b/init/init_task.c @@ -102,7 +102,7 @@ struct task_struct init_task __aligned(L #endif #ifdef CONFIG_SCHED_CLASS_EXT .scx = { - .dsq_node.list = LIST_HEAD_INIT(init_task.scx.dsq_node.list), + .dsq_list.node = LIST_HEAD_INIT(init_task.scx.dsq_list.node), .sticky_cpu = -1, .holding_cpu = -1, .runnable_node = LIST_HEAD_INIT(init_task.scx.runnable_node), --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1360,9 +1360,9 @@ static bool scx_dsq_priq_less(struct rb_ const struct rb_node *node_b) { const struct task_struct *a = - container_of(node_a, struct task_struct, scx.dsq_node.priq); + container_of(node_a, struct task_struct, scx.dsq_priq); const struct task_struct *b = - container_of(node_b, struct task_struct, scx.dsq_node.priq); + container_of(node_b, struct task_struct, scx.dsq_priq); return time_before64(a->scx.dsq_vtime, b->scx.dsq_vtime); } @@ -1378,9 +1378,9 @@ static void dispatch_enqueue(struct scx_ { bool is_local = dsq->id == SCX_DSQ_LOCAL; - WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_node.list)); - WARN_ON_ONCE((p->scx.dsq_node.flags & SCX_TASK_DSQ_ON_PRIQ) || - !RB_EMPTY_NODE(&p->scx.dsq_node.priq)); + WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_list.node)); + WARN_ON_ONCE((p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) || + !RB_EMPTY_NODE(&p->scx.dsq_priq)); if (!is_local) { raw_spin_lock(&dsq->lock); @@ -1419,21 +1419,21 @@ static void dispatch_enqueue(struct scx_ scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks", dsq->id); - p->scx.dsq_node.flags |= SCX_TASK_DSQ_ON_PRIQ; - rb_add(&p->scx.dsq_node.priq, &dsq->priq, scx_dsq_priq_less); + p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ; + rb_add(&p->scx.dsq_priq, &dsq->priq, scx_dsq_priq_less); /* * Find the previous task and insert after it on the list so * that @dsq->list is vtime ordered. */ - rbp = rb_prev(&p->scx.dsq_node.priq); + rbp = rb_prev(&p->scx.dsq_priq); if (rbp) { struct task_struct *prev = container_of(rbp, struct task_struct, - scx.dsq_node.priq); - list_add(&p->scx.dsq_node.list, &prev->scx.dsq_node.list); + scx.dsq_priq); + list_add(&p->scx.dsq_list.node, &prev->scx.dsq_list.node); } else { - list_add(&p->scx.dsq_node.list, &dsq->list); + list_add(&p->scx.dsq_list.node, &dsq->list); } } else { /* a FIFO DSQ shouldn't be using PRIQ enqueuing */ @@ -1442,9 +1442,9 @@ static void dispatch_enqueue(struct scx_ dsq->id); if (enq_flags & (SCX_ENQ_HEAD | SCX_ENQ_PREEMPT)) - list_add(&p->scx.dsq_node.list, &dsq->list); + list_add(&p->scx.dsq_list.node, &dsq->list); else - list_add_tail(&p->scx.dsq_node.list, &dsq->list); + list_add_tail(&p->scx.dsq_list.node, &dsq->list); } dsq_mod_nr(dsq, 1); @@ -1487,18 +1487,18 @@ static void dispatch_enqueue(struct scx_ static void task_unlink_from_dsq(struct task_struct *p, struct scx_dispatch_q *dsq) { - if (p->scx.dsq_node.flags & SCX_TASK_DSQ_ON_PRIQ) { - rb_erase(&p->scx.dsq_node.priq, &dsq->priq); - RB_CLEAR_NODE(&p->scx.dsq_node.priq); - p->scx.dsq_node.flags &= ~SCX_TASK_DSQ_ON_PRIQ; + if (p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) { + rb_erase(&p->scx.dsq_priq, &dsq->priq); + RB_CLEAR_NODE(&p->scx.dsq_priq); + p->scx.dsq_flags &= ~SCX_TASK_DSQ_ON_PRIQ; } - list_del_init(&p->scx.dsq_node.list); + list_del_init(&p->scx.dsq_list.node); } static bool task_linked_on_dsq(struct task_struct *p) { - return !list_empty(&p->scx.dsq_node.list); + return !list_empty(&p->scx.dsq_list.node); } static void dispatch_dequeue(struct rq *rq, struct task_struct *p) @@ -1523,8 +1523,8 @@ static void dispatch_dequeue(struct rq * raw_spin_lock(&dsq->lock); /* - * Now that we hold @dsq->lock, @p->holding_cpu and @p->scx.dsq_node - * can't change underneath us. + * Now that we hold @dsq->lock, @p->holding_cpu and @p->scx.dsq_* can't + * change underneath us. */ if (p->scx.holding_cpu < 0) { /* @p must still be on @dsq, dequeue */ @@ -2034,7 +2034,7 @@ static void consume_local_task(struct rq /* @dsq is locked and @p is on this rq */ WARN_ON_ONCE(p->scx.holding_cpu >= 0); task_unlink_from_dsq(p, dsq); - list_add_tail(&p->scx.dsq_node.list, &rq->scx.local_dsq.list); + list_add_tail(&p->scx.dsq_list.node, &rq->scx.local_dsq.list); dsq_mod_nr(dsq, -1); dsq_mod_nr(&rq->scx.local_dsq, 1); p->scx.dsq = &rq->scx.local_dsq; @@ -2109,7 +2109,7 @@ retry: raw_spin_lock(&dsq->lock); - list_for_each_entry(p, &dsq->list, scx.dsq_node.list) { + list_for_each_entry(p, &dsq->list, scx.dsq_list.node) { struct rq *task_rq = task_rq(p); if (rq == task_rq) { @@ -2628,7 +2628,7 @@ static void put_prev_task_scx(struct rq static struct task_struct *first_local_task(struct rq *rq) { return list_first_entry_or_null(&rq->scx.local_dsq.list, - struct task_struct, scx.dsq_node.list); + struct task_struct, scx.dsq_list.node); } static struct task_struct *pick_next_task_scx(struct rq *rq) @@ -3308,8 +3308,8 @@ void init_scx_entity(struct sched_ext_en */ memset(scx, 0, offsetof(struct sched_ext_entity, tasks_node)); - INIT_LIST_HEAD(&scx->dsq_node.list); - RB_CLEAR_NODE(&scx->dsq_node.priq); + INIT_LIST_HEAD(&scx->dsq_list.node); + RB_CLEAR_NODE(&scx->dsq_priq); scx->sticky_cpu = -1; scx->holding_cpu = -1; INIT_LIST_HEAD(&scx->runnable_node); @@ -4158,7 +4158,7 @@ static void scx_dump_task(struct seq_buf jiffies_delta_msecs(p->scx.runnable_at, dctx->at_jiffies)); dump_line(s, " scx_state/flags=%u/0x%x dsq_flags=0x%x ops_state/qseq=%lu/%lu", scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK, - p->scx.dsq_node.flags, ops_state & SCX_OPSS_STATE_MASK, + p->scx.dsq_flags, ops_state & SCX_OPSS_STATE_MASK, ops_state >> SCX_OPSS_QSEQ_SHIFT); dump_line(s, " sticky/holding_cpu=%d/%d dsq_id=%s dsq_vtime=%llu", p->scx.sticky_cpu, p->scx.holding_cpu, dsq_id_buf,