On 8/1/22 19:11, Sasha Algisi wrote: > Tasks associated to virtual CPUs, IO Threads and Emulator processes > can be created with the SCHED_DEADLINE policy. The policy is described > in details here: https://docs.kernel.org/scheduler/sched-deadline.html > > It requires the following parameters (all in nanoseconds): > 1) runtime > 2) deadline > 3) period > > It must always holds that: runtime <= deadline <= period. > > The kernel enforces that the values stay within [1024, 2^63-1]. > Note, however, that a smaller range could be set (or be already set > by default) via sysctl (see kernel.sched_deadline_period_max_us and > kernel.sched_deadline_period_min_us). > > All the three parameters are mandatory but period can be set to 0, > in which case it will set to the same value of deadline. > > Signed-off-by: Sasha Algisi <sasha.algisi@xxxxxxxxxxxx> > Signed-off-by: Dario Faggioli <dfaggioli@xxxxxxxx> > --- > src/ch/ch_process.c | 3 +- > src/conf/domain_conf.h | 3 ++ > src/qemu/qemu_process.c | 8 +++- > src/util/virprocess.c | 84 +++++++++++++++++++++++++++++++++++++++-- > src/util/virprocess.h | 6 ++- > 5 files changed, 97 insertions(+), 7 deletions(-) > > diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c > index 77f55e777b..a40d188aac 100644 > --- a/src/ch/ch_process.c > +++ b/src/ch/ch_process.c > @@ -293,7 +293,8 @@ virCHProcessSetupPid(virDomainObj *vm, > /* Set scheduler type and priority, but not for the main thread. */ > if (sched && > nameval != VIR_CGROUP_THREAD_EMULATOR && > - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) > + virProcessSetScheduler(pid, sched->policy, sched->priority, > + sched->runtime, sched->deadline, sched->period) < 0) > goto cleanup; > > ret = 0; > diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h > index 060c395943..c3d1a1b65d 100644 > --- a/src/conf/domain_conf.h > +++ b/src/conf/domain_conf.h > @@ -2454,6 +2454,9 @@ typedef enum { > struct _virDomainThreadSchedParam { > virProcessSchedPolicy policy; > int priority; > + uint64_t runtime; > + uint64_t deadline; > + uint64_t period; Or just unsigned long long. Here we don't face kernel just yet, and can use 'more generic' types which also allows you to drop plenty of typecasts when using internal helpers. > }; > > struct _virDomainTimerCatchupDef { > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > index 137dcf5cf4..7586e0538a 100644 > --- a/src/qemu/qemu_process.c > +++ b/src/qemu/qemu_process.c > @@ -2728,7 +2728,8 @@ qemuProcessSetupPid(virDomainObj *vm, > /* Set scheduler type and priority, but not for the main thread. */ > if (sched && > nameval != VIR_CGROUP_THREAD_EMULATOR && > - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) > + virProcessSetScheduler(pid, sched->policy, sched->priority, > + sched->runtime, sched->deadline, sched->period) < 0) > goto cleanup; > > ret = 0; > @@ -7813,7 +7814,10 @@ qemuProcessLaunch(virConnectPtr conn, > if (vm->def->cputune.emulatorsched && > virProcessSetScheduler(vm->pid, > vm->def->cputune.emulatorsched->policy, > - vm->def->cputune.emulatorsched->priority) < 0) > + vm->def->cputune.emulatorsched->priority, > + vm->def->cputune.emulatorsched->runtime, > + vm->def->cputune.emulatorsched->deadline, > + vm->def->cputune.emulatorsched->period) < 0) > goto cleanup; > > VIR_DEBUG("Setting any required VM passwords"); > diff --git a/src/util/virprocess.c b/src/util/virprocess.c > index a8f86784e1..c96bfc45fd 100644 > --- a/src/util/virprocess.c > +++ b/src/util/virprocess.c > @@ -84,6 +84,7 @@ VIR_ENUM_IMPL(virProcessSchedPolicy, > "idle", > "fifo", > "rr", > + "deadline", > ); > > > @@ -1610,6 +1611,13 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) > case VIR_PROC_POLICY_RR: > return SCHED_RR; > > + case VIR_PROC_POLICY_DEADLINE: > +# ifdef SCHED_DEADLINE > + return SCHED_DEADLINE; > +# else > + return -1; > +# endif > + > case VIR_PROC_POLICY_LAST: > /* nada */ > break; > @@ -1621,13 +1629,20 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) > int > virProcessSetScheduler(pid_t pid, > virProcessSchedPolicy policy, > - int priority) > + int priority, > + uint64_t runtime G_GNUC_UNUSED, > + uint64_t deadline G_GNUC_UNUSED, > + uint64_t period G_GNUC_UNUSED) The !WITH_SCHED_SETSCHEDULER case stub is not updated correspondingly. And these new arguments don't need the unused annotation ... > { > struct sched_param param = {0}; > int pol = virProcessSchedTranslatePolicy(policy); > > - VIR_DEBUG("pid=%lld, policy=%d, priority=%u", > - (long long) pid, policy, priority); > + VIR_DEBUG("pid=%lld, policy=%d, priority=%u, " > + "runtime=%llu, deadline=%llu, period=%llu", > + (long long) pid, policy, priority, > + (unsigned long long) runtime, > + (unsigned long long) deadline, > + (unsigned long long) period); .. as they are used here. Also, arguments can be ull type because it's only later in this function that we need to convert them to uint64_t. > > if (!policy) > return 0; > @@ -1667,6 +1682,69 @@ virProcessSetScheduler(pid_t pid, > param.sched_priority = priority; > } > > +# ifdef SCHED_DEADLINE > + if (pol == SCHED_DEADLINE) { > + struct sched_attr attr = {0}; > + /* > + * The range is enforced in the kernel. > + * See: https://man7.org/linux/man-pages/man7/sched.7.html > + */ > + uint64_t min_value = 1024; > + uint64_t max_value = (1ULL << 63) - 1; > + > + if (runtime < min_value || runtime > max_value) { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + _("Scheduler runtime %llu out of range " > + "[%llu, %llu]"), > + (unsigned long long) runtime, > + (unsigned long long) min_value, > + (unsigned long long) max_value); > + return -1; > + } > + > + if (deadline < runtime || deadline > max_value) { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + _("Scheduler deadline %llu out of range " > + "[%llu, %llu]"), > + (unsigned long long) deadline, > + (unsigned long long) runtime, > + (unsigned long long) max_value); > + return -1; > + } > + > + if ((period < deadline || period > max_value) && period != 0) { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + _("Invalid scheduler period %llu, " > + "possible correct values are 0 " > + "or [%llu, %llu]"), > + (unsigned long long) period, > + (unsigned long long) deadline, > + (unsigned long long) max_value); > + return -1; > + } > + > + attr.size = sizeof(attr); > + attr.sched_policy = pol; > + /* > + * Setting reset-on-fork is necessary as SCHED_DEADLINE > + * tasks cannot fork. See: > + * https://docs.kernel.org/scheduler/sched-deadline.html#default-behavior > + */ > + attr.sched_flags = SCHED_FLAG_RESET_ON_FORK; > + attr.sched_runtime = runtime; > + attr.sched_deadline = deadline; > + attr.sched_period = period; > + > + if (sched_setattr(pid, &attr, 0) == 0) { > + return 0; > + } else { > + virReportSystemError(errno, > + _("Cannot set scheduler parameters for pid %lld"), > + (long long) pid); > + return -1; > + } > + } > +# endif > if (sched_setscheduler(pid, pol, ¶m) < 0) { So what happens when the deadline policy is requested (e.g. virProcessSetScheduler(policy=VIR_PROC_POLICY_DEADLINE) but SCHED_DEADLINE is not available? Does this sched_setscheduler() here error out? > virReportSystemError(errno, > _("Cannot set scheduler parameters for pid %lld"), > diff --git a/src/util/virprocess.h b/src/util/virprocess.h > index 30b6981c73..84fdf00fdf 100644 > --- a/src/util/virprocess.h > +++ b/src/util/virprocess.h > @@ -33,6 +33,7 @@ typedef enum { > VIR_PROC_POLICY_IDLE, > VIR_PROC_POLICY_FIFO, > VIR_PROC_POLICY_RR, > + VIR_PROC_POLICY_DEADLINE, This fails to compile, because of a switch() inside virDomainSchedulerFormat() that does not handle this new case. I think you can work around this by reordering things a bit. For instance, the first patch can introduce XML parsing/formatting and this new enum member. Then, follow up patch(-es) can extend virProcessSetScheduler. > > VIR_PROC_POLICY_LAST > } virProcessSchedPolicy; > @@ -116,7 +117,10 @@ int virProcessSetupPrivateMountNS(void); > > int virProcessSetScheduler(pid_t pid, > virProcessSchedPolicy policy, > - int priority); > + int priority, > + uint64_t runtime, > + uint64_t deadline, > + uint64_t period); > > GStrv virProcessGetStat(pid_t pid, pid_t tid); > Michal