The following commit has been merged into the sched/core branch of tip: Commit-ID: f4dddf90d58d77b48492b775868af4041a217f4c Gitweb: https://git.kernel.org/tip/f4dddf90d58d77b48492b775868af4041a217f4c Author: Quentin Perret <qperret@xxxxxxxxxx> AuthorDate: Thu, 05 Aug 2021 11:21:54 +01:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Fri, 06 Aug 2021 14:25:25 +02:00 sched: Skip priority checks with SCHED_FLAG_KEEP_PARAMS SCHED_FLAG_KEEP_PARAMS can be passed to sched_setattr to specify that the call must not touch scheduling parameters (nice or priority). This is particularly handy for uclamp when used in conjunction with SCHED_FLAG_KEEP_POLICY as that allows to issue a syscall that only impacts uclamp values. However, sched_setattr always checks whether the priorities and nice values passed in sched_attr are valid first, even if those never get used down the line. This is useless at best since userspace can trivially bypass this check to set the uclamp values by specifying low priorities. However, it is cumbersome to do so as there is no single expression of this that skips both RT and CFS checks at once. As such, userspace needs to query the task policy first with e.g. sched_getattr and then set sched_attr.sched_priority accordingly. This is racy and slower than a single call. As the priority and nice checks are useless when SCHED_FLAG_KEEP_PARAMS is specified, simply inherit them in this case to match the policy inheritance of SCHED_FLAG_KEEP_POLICY. Reported-by: Wei Wang <wvw@xxxxxxxxxx> Signed-off-by: Quentin Perret <qperret@xxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@xxxxxxx> Reviewed-by: Qais Yousef <qais.yousef@xxxxxxx> Link: https://lore.kernel.org/r/20210805102154.590709-3-qperret@xxxxxxxxxx --- kernel/sched/core.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index df0480a..433b400 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7328,6 +7328,16 @@ err_size: return -E2BIG; } +static void get_params(struct task_struct *p, struct sched_attr *attr) +{ + if (task_has_dl_policy(p)) + __getparam_dl(p, attr); + else if (task_has_rt_policy(p)) + attr->sched_priority = p->rt_priority; + else + attr->sched_nice = task_nice(p); +} + /** * sys_sched_setscheduler - set/change the scheduler policy and RT priority * @pid: the pid in question. @@ -7389,6 +7399,8 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, rcu_read_unlock(); if (likely(p)) { + if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS) + get_params(p, &attr); retval = sched_setattr(p, &attr); put_task_struct(p); } @@ -7537,12 +7549,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, kattr.sched_policy = p->policy; if (p->sched_reset_on_fork) kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; - if (task_has_dl_policy(p)) - __getparam_dl(p, &kattr); - else if (task_has_rt_policy(p)) - kattr.sched_priority = p->rt_priority; - else - kattr.sched_nice = task_nice(p); + get_params(p, &kattr); kattr.sched_flags &= SCHED_FLAG_ALL; #ifdef CONFIG_UCLAMP_TASK