From: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx> Parameters: period - interval between refills (100ms should be fine) quota - events refill per period deadline - interval to utilize unused past quota (1s by default) latency - maximum injected delay (10s by default) Quota sums into 'budget' and spreads across cpus. Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx> --- include/linux/percpu_ratelimit.h | 45 ++++++++++ lib/Makefile | 1 lib/percpu_ratelimit.c | 168 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+) create mode 100644 include/linux/percpu_ratelimit.h create mode 100644 lib/percpu_ratelimit.c diff --git a/include/linux/percpu_ratelimit.h b/include/linux/percpu_ratelimit.h new file mode 100644 index 0000000..42c45d4 --- /dev/null +++ b/include/linux/percpu_ratelimit.h @@ -0,0 +1,45 @@ +#ifndef _LINUX_PERCPU_RATELIMIT_H +#define _LINUX_PERCPU_RATELIMIT_H + +#include <linux/hrtimer.h> + +struct percpu_ratelimit { + struct hrtimer timer; + ktime_t target; /* time of next refill */ + ktime_t deadline; /* interval to utilize past budget */ + ktime_t latency; /* maximum injected delay */ + ktime_t period; /* interval between refills */ + u64 quota; /* events refill per period */ + u64 budget; /* amount of available events */ + u64 total; /* consumed and pre-charged events */ + raw_spinlock_t lock; /* protect the state */ + u32 cpu_batch; /* events in per-cpu precharge */ + u32 __percpu *cpu_budget; /* per-cpu precharge */ +}; + +static inline bool percpu_ratelimit_blocked(struct percpu_ratelimit *rl) +{ + return hrtimer_active(&rl->timer); +} + +static inline ktime_t percpu_ratelimit_target(struct percpu_ratelimit *rl) +{ + return rl->target; +} + +static inline int percpu_ratelimit_wait(struct percpu_ratelimit *rl) +{ + ktime_t target = rl->target; + + return schedule_hrtimeout_range(&target, ktime_to_ns(rl->period), + HRTIMER_MODE_ABS); +} + +int percpu_ratelimit_init(struct percpu_ratelimit *rl, gfp_t gfp); +void percpu_ratelimit_destroy(struct percpu_ratelimit *rl); +void percpu_ratelimit_setup(struct percpu_ratelimit *rl, u64 quota, u64 period); +u64 percpu_ratelimit_quota(struct percpu_ratelimit *rl, u64 period); +bool percpu_ratelimit_charge(struct percpu_ratelimit *rl, u64 events); +u64 percpu_ratelimit_sum(struct percpu_ratelimit *rl); + +#endif /* _LINUX_PERCPU_RATELIMIT_H */ diff --git a/lib/Makefile b/lib/Makefile index 3c3b30b..b20ab47 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -21,6 +21,7 @@ lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o obj-y += lockref.o +obj-y += percpu_ratelimit.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ diff --git a/lib/percpu_ratelimit.c b/lib/percpu_ratelimit.c new file mode 100644 index 0000000..8254683 --- /dev/null +++ b/lib/percpu_ratelimit.c @@ -0,0 +1,168 @@ +#include <linux/percpu_ratelimit.h> + +static void __percpu_ratelimit_setup(struct percpu_ratelimit *rl, + u64 period, u64 quota) +{ + rl->period = ns_to_ktime(period); + rl->quota = quota; + rl->total += quota - rl->budget; + rl->budget = quota; + if (do_div(quota, num_possible_cpus() * 2)) + quota++; + rl->cpu_batch = min_t(u64, UINT_MAX, quota); + rl->target = ktime_get(); +} + +static enum hrtimer_restart ratelimit_unblock(struct hrtimer *t) +{ + struct percpu_ratelimit *rl = container_of(t, struct percpu_ratelimit, timer); + enum hrtimer_restart ret = HRTIMER_NORESTART; + ktime_t now = t->base->get_time(); + + raw_spin_lock(&rl->lock); + if (ktime_after(rl->target, now)) { + hrtimer_set_expires_range(t, rl->target, rl->period); + ret = HRTIMER_RESTART; + } + raw_spin_unlock(&rl->lock); + + return ret; +} + +int percpu_ratelimit_init(struct percpu_ratelimit *rl, gfp_t gfp) +{ + memset(rl, 0, sizeof(*rl)); + rl->cpu_budget = alloc_percpu_gfp(typeof(*rl->cpu_budget), gfp); + if (!rl->cpu_budget) + return -ENOMEM; + raw_spin_lock_init(&rl->lock); + hrtimer_init(&rl->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + rl->timer.function = ratelimit_unblock; + rl->deadline = ns_to_ktime(NSEC_PER_SEC); + rl->latency = ns_to_ktime(NSEC_PER_SEC * 10); + __percpu_ratelimit_setup(rl, NSEC_PER_SEC, ULLONG_MAX); + return 0; +} +EXPORT_SYMBOL_GPL(percpu_ratelimit_init); + +void percpu_ratelimit_destroy(struct percpu_ratelimit *rl) +{ + free_percpu(rl->cpu_budget); + hrtimer_cancel(&rl->timer); +} +EXPORT_SYMBOL_GPL(percpu_ratelimit_destroy); + +static void percpu_ratelimit_drain(void *info) +{ + struct percpu_ratelimit *rl = info; + + __this_cpu_write(*rl->cpu_budget, 0); +} + +void percpu_ratelimit_setup(struct percpu_ratelimit *rl, u64 quota, u64 period) +{ + unsigned long flags; + + if (!quota || !period) { + quota = ULLONG_MAX; + period = NSEC_PER_SEC; + } else if (period > NSEC_PER_SEC / 10) { + u64 quant = div_u64(quota * NSEC_PER_SEC / 10, period); + + if (quant > 20) { + quota = quant; + period = NSEC_PER_SEC / 10; + } + } + + raw_spin_lock_irqsave(&rl->lock, flags); + __percpu_ratelimit_setup(rl, period, quota); + raw_spin_unlock_irqrestore(&rl->lock, flags); + on_each_cpu(percpu_ratelimit_drain, rl, 1); + hrtimer_cancel(&rl->timer); +} +EXPORT_SYMBOL_GPL(percpu_ratelimit_setup); + +u64 percpu_ratelimit_quota(struct percpu_ratelimit *rl, u64 period) +{ + unsigned long flags; + u64 quota; + + raw_spin_lock_irqsave(&rl->lock, flags); + if (rl->quota == ULLONG_MAX) + quota = 0; + else + quota = div64_u64(rl->quota * period, ktime_to_ns(rl->period)); + raw_spin_unlock_irqrestore(&rl->lock, flags); + + return quota; +} +EXPORT_SYMBOL_GPL(percpu_ratelimit_quota); + +/* + * Charges events, returns true if ratelimit is blocked and caller should sleep. + */ +bool percpu_ratelimit_charge(struct percpu_ratelimit *rl, u64 events) +{ + unsigned long flags; + u64 budget, delta; + ktime_t now, deadline; + + preempt_disable(); + budget = __this_cpu_read(*rl->cpu_budget); + if (likely(budget >= events)) { + __this_cpu_sub(*rl->cpu_budget, events); + } else { + now = ktime_get(); + raw_spin_lock_irqsave(&rl->lock, flags); + deadline = ktime_sub(now, rl->deadline); + if (ktime_after(deadline, rl->target)) + rl->target = deadline; + budget += rl->budget; + if (budget >= events + rl->cpu_batch) { + budget -= events; + } else { + delta = events + rl->cpu_batch - budget; + if (do_div(delta, rl->quota)) + delta++; + rl->target = ktime_add_ns(rl->target, + ktime_to_ns(rl->period) * delta); + deadline = ktime_add(now, rl->latency); + if (ktime_after(rl->target, deadline)) + rl->target = deadline; + delta *= rl->quota; + rl->total += delta; + budget += delta - events; + } + rl->budget = budget - rl->cpu_batch; + __this_cpu_write(*rl->cpu_budget, rl->cpu_batch); + if (!hrtimer_active(&rl->timer) && ktime_after(rl->target, now)) + hrtimer_start_range_ns(&rl->timer, rl->target, + ktime_to_ns(rl->period), + HRTIMER_MODE_ABS); + raw_spin_unlock_irqrestore(&rl->lock, flags); + } + preempt_enable(); + + return percpu_ratelimit_blocked(rl); +} +EXPORT_SYMBOL_GPL(percpu_ratelimit_charge); + +/* + * Returns count of consumed events. + */ +u64 percpu_ratelimit_sum(struct percpu_ratelimit *rl) +{ + unsigned long flags; + int cpu; + s64 ret; + + raw_spin_lock_irqsave(&rl->lock, flags); + ret = rl->total - rl->budget; + for_each_online_cpu(cpu) + ret -= per_cpu(*rl->cpu_budget, cpu); + raw_spin_unlock_irqrestore(&rl->lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(percpu_ratelimit_sum); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html