/* Restart the period timer (if active) to handle new period expiry: */
- if (runtime_enabled)
+ if (runtime_enabled) {
+ __refill_cfs_bandwidth_runtime(cfs_b);
start_cfs_bandwidth(cfs_b);
+ }
raw_spin_unlock_irq(&cfs_b->lock);
@@ -7432,9 +7434,10 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
{
- u64 quota, period;
+ u64 quota, period, burst;
period = ktime_to_ns(tg->cfs_bandwidth.period);
+ burst = tg->cfs_bandwidth.burst;
if (cfs_quota_us < 0)
quota = RUNTIME_INF;
else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC)
@@ -7442,7 +7445,7 @@ static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
else
return -EINVAL;
- return tg_set_cfs_bandwidth(tg, period, quota);
+ return tg_set_cfs_bandwidth(tg, period, quota, burst);
}
static long tg_get_cfs_quota(struct task_group *tg)
@@ -7460,15 +7463,16 @@ static long tg_get_cfs_quota(struct task_group *tg)
static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
{
- u64 quota, period;
+ u64 quota, period, burst;
if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC)
return -EINVAL;
period = (u64)cfs_period_us * NSEC_PER_USEC;
quota = tg->cfs_bandwidth.quota;
+ burst = tg->cfs_bandwidth.burst;
- return tg_set_cfs_bandwidth(tg, period, quota);
+ return tg_set_cfs_bandwidth(tg, period, quota, burst);
}
static long tg_get_cfs_period(struct task_group *tg)
@@ -7481,6 +7485,28 @@ static long tg_get_cfs_period(struct task_group *tg)
return cfs_period_us;
}
+static long tg_get_cfs_burst(struct task_group *tg)
+{
+ u64 cfs_burst_us = tg->cfs_bandwidth.burst;
+
+ do_div(cfs_burst_us, NSEC_PER_USEC);
+ return cfs_burst_us;
+}
+
+static int tg_set_cfs_burst(struct task_group *tg, long cfs_burst_us)
+{
+ u64 quota, period, burst;
+
+ if ((u64)cfs_burst_us > U64_MAX / NSEC_PER_USEC)
+ return -EINVAL;
+
+ period = ktime_to_ns(tg->cfs_bandwidth.period);
+ quota = tg->cfs_bandwidth.quota;
+ burst = (u64)cfs_burst_us * NSEC_PER_USEC;
+
+ return tg_set_cfs_bandwidth(tg, period, quota, burst);
+}
+
static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
struct cftype *cft)
{
@@ -7505,6 +7531,18 @@ static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
return tg_set_cfs_period(css_tg(css), cfs_period_us);
}
+static u64 cpu_cfs_burst_read_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ return tg_get_cfs_burst(css_tg(css));
+}
+
+static int cpu_cfs_burst_write_u64(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 cfs_burst_us)
+{
+ return tg_set_cfs_burst(css_tg(css), cfs_burst_us);
+}
+
struct cfs_schedulable_data {
struct task_group *tg;
u64 period, quota;
@@ -7596,6 +7634,7 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods);
seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled);
seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
+ seq_printf(sf, "burst_time %llu\n", cfs_b->burst_time);
if (schedstat_enabled() && tg != &root_task_group) {
u64 ws = 0;
@@ -7657,6 +7696,11 @@ static struct cftype cpu_legacy_files[] = {
.read_u64 = cpu_cfs_period_read_u64,
.write_u64 = cpu_cfs_period_write_u64,
},
+ {
+ .name = "cfs_burst_us",
+ .read_u64 = cpu_cfs_burst_read_u64,
+ .write_u64 = cpu_cfs_burst_write_u64,
+ },
{
.name = "stat",
.seq_show = cpu_cfs_stat_show,
@@ -7699,15 +7743,20 @@ static int cpu_extra_stat_show(struct seq_file *sf,
struct task_group *tg = css_tg(css);
struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
u64 throttled_usec;
+ u64 burst_usec;
throttled_usec = cfs_b->throttled_time;
do_div(throttled_usec, NSEC_PER_USEC);
+ burst_usec = cfs_b->burst_time;
+ do_div(burst_usec, NSEC_PER_USEC);
+
seq_printf(sf, "nr_periods %d\n"
"nr_throttled %d\n"
- "throttled_usec %llu\n",
+ "throttled_usec %llu\n"
+ "burst_usec %llu\n",
cfs_b->nr_periods, cfs_b->nr_throttled,
- throttled_usec);
+ throttled_usec, burst_usec);
}
#endif
return 0;
@@ -7777,26 +7826,29 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
#endif
static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
- long period, long quota)
+ long period, long quota,
+ long burst)
{
if (quota < 0)
seq_puts(sf, "max");
else
seq_printf(sf, "%ld", quota);
- seq_printf(sf, " %ld\n", period);
+ seq_printf(sf, " %ld %ld\n", period, burst);
}
/* caller should put the current value in *@periodp before calling */
static int __maybe_unused cpu_period_quota_parse(char *buf,
- u64 *periodp, u64 *quotap)
+ u64 *periodp, u64 *quotap,
+ s64 *burstp)
{
char tok[21]; /* U64_MAX */
- if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
+ if (sscanf(buf, "%20s %llu %llu", tok, periodp, burstp) < 1)
return -EINVAL;
*periodp *= NSEC_PER_USEC;
+ *burstp *= NSEC_PER_USEC;
if (sscanf(tok, "%llu", quotap))
*quotap *= NSEC_PER_USEC;
@@ -7813,7 +7865,8 @@ static int cpu_max_show(struct seq_file *sf, void *v)
{
struct task_group *tg = css_tg(seq_css(sf));
- cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
+ cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg),
+ tg_get_cfs_burst(tg));
return 0;
}
@@ -7822,12 +7875,13 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
{
struct task_group *tg = css_tg(of_css(of));
u64 period = tg_get_cfs_period(tg);
+ s64 burst = tg_get_cfs_burst(tg);
u64 quota;
int ret;
- ret = cpu_period_quota_parse(buf, &period, "a);
+ ret = cpu_period_quota_parse(buf, &period, "a, &burst);
if (!ret)
- ret = tg_set_cfs_bandwidth(tg, period, quota);
+ ret = tg_set_cfs_bandwidth(tg, period, quota, burst);
return ret ?: nbytes;
}
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 682a754ea3e1..26b8dac67c79 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4353,16 +4353,26 @@ static inline u64 sched_cfs_bandwidth_slice(void)
}
/*
- * Replenish runtime according to assigned quota. We use sched_clock_cpu
- * directly instead of rq->clock to avoid adding additional synchronization
- * around rq->lock.
+ * Replenish runtime according to assigned quota.
+ * Called only if quota != RUNTIME_INF.
*
* requires cfs_b->lock
*/
void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
{
- if (cfs_b->quota != RUNTIME_INF)
- cfs_b->runtime = cfs_b->quota;
+ u64 runtime = cfs_b->runtime;
+
+ /*
+ * Preserve past runtime up to burst size. If remaining runtime lower
+ * than previous burst runtime then account delta as used burst time.
+ */
+ if (runtime > cfs_b->burst)
+ runtime = cfs_b->burst;
+ else if (runtime < cfs_b->burst_runtime)
+ cfs_b->burst_time += cfs_b->burst_runtime - runtime;
+
+ cfs_b->burst_runtime = runtime;
+ cfs_b->runtime = runtime + cfs_b->quota;
}
static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -4968,6 +4978,9 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
cfs_b->runtime = 0;
cfs_b->quota = RUNTIME_INF;
cfs_b->period = ns_to_ktime(default_cfs_period());
+ cfs_b->burst = 0;
+ cfs_b->burst_runtime = 0;
+ cfs_b->burst_time = 0;
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
@@ -4986,14 +4999,23 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
{
+ u64 overrun;
+
lockdep_assert_held(&cfs_b->lock);
if (cfs_b->period_active)
return;
cfs_b->period_active = 1;
- hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
+ overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
+
+ /*
+ * Refill runtime for periods of inactivity and current.
+ * __refill_cfs_bandwidth_runtime() will cut excess.
+ */
+ cfs_b->runtime += cfs_b->quota * overrun;
+ __refill_cfs_bandwidth_runtime(cfs_b);
}
static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0db2c1b3361e..eaecd7298f80 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -344,10 +344,14 @@ struct cfs_bandwidth {
struct hrtimer slack_timer;
struct list_head throttled_cfs_rq;
+ u64 burst;
+ u64 burst_runtime;
+
/* Statistics: */
int nr_periods;
int nr_throttled;
u64 throttled_time;
+ u64 burst_time;
#endif
};