rt_task() checks if a task has RT priority. But depends on your dictionary, this could mean it belongs to RT class, or is a 'realtime' task, which includes RT and DL classes. Since this has caused some confusion already on discussion [1], it seemed a clean up is due. I define the usage of rt_task() to be tasks that belong to RT class. Make sure that it returns true only for RT class and audit the users and replace them with the new realtime_task() which returns true for RT and DL classes - the old behavior. Introduce similar realtime_prio() to create similar distinction to rt_prio() and update the users. Move MAX_DL_PRIO to prio.h so it can be used in the new definitions. Document the functions to make it more obvious what is the difference between them. PI-boosted tasks is a factor that must be taken into account when choosing which function to use. Rename task_is_realtime() to task_has_realtime_policy() as the old name is confusing against the new realtime_task(). No functional changes were intended. [1] https://lore.kernel.org/lkml/20240506100509.GL40213@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/ Signed-off-by: Qais Yousef <qyousef@xxxxxxxxxxx> --- fs/select.c | 2 +- include/linux/ioprio.h | 2 +- include/linux/sched/deadline.h | 6 ++++-- include/linux/sched/prio.h | 1 + include/linux/sched/rt.h | 27 ++++++++++++++++++++++++++- kernel/locking/rtmutex.c | 4 ++-- kernel/locking/rwsem.c | 4 ++-- kernel/locking/ww_mutex.h | 2 +- kernel/sched/core.c | 6 +++--- kernel/time/hrtimer.c | 6 +++--- kernel/trace/trace_sched_wakeup.c | 2 +- mm/page-writeback.c | 4 ++-- mm/page_alloc.c | 2 +- 13 files changed, 48 insertions(+), 20 deletions(-) diff --git a/fs/select.c b/fs/select.c index 9515c3fa1a03..8d5c1419416c 100644 --- a/fs/select.c +++ b/fs/select.c @@ -82,7 +82,7 @@ u64 select_estimate_accuracy(struct timespec64 *tv) * Realtime tasks get a slack of 0 for obvious reasons. */ - if (rt_task(current)) + if (realtime_task(current)) return 0; ktime_get_ts64(&now); diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index db1249cd9692..6c00342b6166 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task) { if (task->policy == SCHED_IDLE) return IOPRIO_CLASS_IDLE; - else if (task_is_realtime(task)) + else if (task_has_realtime_policy(task)) return IOPRIO_CLASS_RT; else return IOPRIO_CLASS_BE; diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index df3aca89d4f5..5cb88b748ad6 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -10,8 +10,6 @@ #include <linux/sched.h> -#define MAX_DL_PRIO 0 - static inline int dl_prio(int prio) { if (unlikely(prio < MAX_DL_PRIO)) @@ -19,6 +17,10 @@ static inline int dl_prio(int prio) return 0; } +/* + * Returns true if a task has a priority that belongs to DL class. PI-boosted + * tasks will return true. Use dl_policy() to ignore PI-boosted tasks. + */ static inline int dl_task(struct task_struct *p) { return dl_prio(p->prio); diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index ab83d85e1183..6ab43b4f72f9 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -14,6 +14,7 @@ */ #define MAX_RT_PRIO 100 +#define MAX_DL_PRIO 0 #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index b2b9e6eb9683..b31be3c50152 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -7,18 +7,43 @@ struct task_struct; static inline int rt_prio(int prio) +{ + if (unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO)) + return 1; + return 0; +} + +static inline int realtime_prio(int prio) { if (unlikely(prio < MAX_RT_PRIO)) return 1; return 0; } +/* + * Returns true if a task has a priority that belongs to RT class. PI-boosted + * tasks will return true. Use rt_policy() to ignore PI-boosted tasks. + */ static inline int rt_task(struct task_struct *p) { return rt_prio(p->prio); } -static inline bool task_is_realtime(struct task_struct *tsk) +/* + * Returns true if a task has a priority that belongs to RT or DL classes. + * PI-boosted tasks will return true. Use task_has_realtime_policy() to ignore + * PI-boosted tasks. + */ +static inline int realtime_task(struct task_struct *p) +{ + return realtime_prio(p->prio); +} + +/* + * Returns true if a task has a policy that belongs to RT or DL classes. + * PI-boosted tasks will return false. + */ +static inline bool task_has_realtime_policy(struct task_struct *tsk) { int policy = tsk->policy; diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 88d08eeb8bc0..55c9dab37f33 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -347,7 +347,7 @@ static __always_inline int __waiter_prio(struct task_struct *task) { int prio = task->prio; - if (!rt_prio(prio)) + if (!realtime_prio(prio)) return DEFAULT_PRIO; return prio; @@ -435,7 +435,7 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, * Note that RT tasks are excluded from same priority (lateral) * steals to prevent the introduction of an unbounded latency. */ - if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio)) + if (realtime_prio(waiter->tree.prio)) return false; return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index c6d17aee4209..ad8d4438bc91 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -631,7 +631,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, * if it is an RT task or wait in the wait queue * for too long. */ - if (has_handoff || (!rt_task(waiter->task) && + if (has_handoff || (!realtime_task(waiter->task) && !time_after(jiffies, waiter->timeout))) return false; @@ -914,7 +914,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) if (owner_state != OWNER_WRITER) { if (need_resched()) break; - if (rt_task(current) && + if (realtime_task(current) && (prev_owner_state != OWNER_WRITER)) break; } diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 3ad2cc4823e5..fa4b416a1f62 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -237,7 +237,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) int a_prio = a->task->prio; int b_prio = b->task->prio; - if (rt_prio(a_prio) || rt_prio(b_prio)) { + if (realtime_prio(a_prio) || realtime_prio(b_prio)) { if (a_prio > b_prio) return true; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1a914388144a..27f15de3d099 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -162,7 +162,7 @@ static inline int __task_prio(const struct task_struct *p) if (p->sched_class == &stop_sched_class) /* trumps deadline */ return -2; - if (rt_prio(p->prio)) /* includes deadline */ + if (realtime_prio(p->prio)) /* includes deadline */ return p->prio; /* [-1, 99] */ if (p->sched_class == &idle_sched_class) @@ -2198,7 +2198,7 @@ static int effective_prio(struct task_struct *p) * keep the priority unchanged. Otherwise, update priority * to the normal priority: */ - if (!rt_prio(p->prio)) + if (!realtime_prio(p->prio)) return p->normal_prio; return p->prio; } @@ -10282,7 +10282,7 @@ void normalize_rt_tasks(void) schedstat_set(p->stats.sleep_start, 0); schedstat_set(p->stats.block_start, 0); - if (!dl_task(p) && !rt_task(p)) { + if (!realtime_task(p)) { /* * Renice negative nice level userspace * tasks back to 0: diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 70625dff62ce..4150e98847fa 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1996,7 +1996,7 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, * expiry. */ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT)) + if (task_has_realtime_policy(current) && !(mode & HRTIMER_MODE_SOFT)) mode |= HRTIMER_MODE_HARD; } @@ -2096,7 +2096,7 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, u64 slack; slack = current->timer_slack_ns; - if (rt_task(current)) + if (realtime_task(current)) slack = 0; hrtimer_init_sleeper_on_stack(&t, clockid, mode); @@ -2301,7 +2301,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, * Override any slack passed by the user if under * rt contraints. */ - if (rt_task(current)) + if (realtime_task(current)) delta = 0; hrtimer_init_sleeper_on_stack(&t, clock_id, mode); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0469a04a355f..19d737742e29 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -545,7 +545,7 @@ probe_wakeup(void *ignore, struct task_struct *p) * - wakeup_dl handles tasks belonging to sched_dl class only. */ if (tracing_dl || (wakeup_dl && !dl_task(p)) || - (wakeup_rt && !dl_task(p) && !rt_task(p)) || + (wakeup_rt && !realtime_task(p)) || (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio))) return; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3e19b87049db..7372e40f225d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -418,7 +418,7 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) if (bg_thresh >= thresh) bg_thresh = thresh / 2; tsk = current; - if (rt_task(tsk)) { + if (realtime_task(tsk)) { bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; } @@ -468,7 +468,7 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat) else dirty = vm_dirty_ratio * node_memory / 100; - if (rt_task(tsk)) + if (realtime_task(tsk)) dirty += dirty / 4; return dirty; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 14d39f34d336..0af24a60ade0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3877,7 +3877,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order) */ if (alloc_flags & ALLOC_MIN_RESERVE) alloc_flags &= ~ALLOC_CPUSET; - } else if (unlikely(rt_task(current)) && in_task()) + } else if (unlikely(realtime_task(current)) && in_task()) alloc_flags |= ALLOC_MIN_RESERVE; alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags); -- 2.34.1