When kernel is about to handle pending softirqs, it will check firstly whether softirq already use up its CPU bandwidth, Once the total duration of softirq handling exceed the max value in the user-specified time window, softirq will be throttled for a while, the throttling will be removed when time window expires. On then other hand, kernel will update the runtime of softirq on given CPU before __do_softirq() function returns. Signed-off-by: Liao Chang <liaochang1@xxxxxxxxxx> --- kernel/softirq.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/kernel/softirq.c b/kernel/softirq.c index 8aac9e2631fd..6de6db794ac5 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -75,6 +75,49 @@ struct softirq_throttle { raw_spinlock_t lock; } si_throttle; +struct softirq_runtime { + bool throttled; + unsigned long duration; + unsigned long expires; + raw_spinlock_t lock; +}; +static DEFINE_PER_CPU(struct softirq_runtime, softirq_runtime); + +static void forward_softirq_expires(struct softirq_runtime *si_runtime) +{ + si_runtime->throttled = false; + si_runtime->duration = 0UL; + si_runtime->expires = jiffies + + msecs_to_jiffies(si_throttle.period - si_throttle.runtime); +} + +static void update_softirq_runtime(unsigned long duration) +{ + struct softirq_runtime *si_runtime = this_cpu_ptr(&softirq_runtime); + + raw_spin_lock(&si_runtime->lock); + si_runtime->duration += jiffies_to_msecs(duration); + if ((si_runtime->duration >= si_throttle.runtime) && + time_before(jiffies, si_runtime->expires)) { + si_runtime->throttled = true; + } + raw_spin_unlock(&si_runtime->lock); +} + +static bool softirq_runtime_exceeded(void) +{ + struct softirq_runtime *si_runtime = this_cpu_ptr(&softirq_runtime); + + if ((unsigned int)si_throttle.runtime >= si_throttle.period) + return false; + + raw_spin_lock(&si_runtime->lock); + if (!time_before(jiffies, si_runtime->expires)) + forward_softirq_expires(si_runtime); + raw_spin_unlock(&si_runtime->lock); + return si_runtime->throttled; +} + static int softirq_throttle_validate(void) { if (((int)sysctl_softirq_period_ms <= 0) || @@ -88,10 +131,18 @@ static int softirq_throttle_validate(void) static void softirq_throttle_update(void) { unsigned long flags; + struct softirq_runtime *si_runtime; raw_spin_lock_irqsave(&si_throttle.lock, flags); si_throttle.period = sysctl_softirq_period_ms; si_throttle.runtime = sysctl_softirq_runtime_ms; + + for_each_possible_cpu(cpu, &) { + si_runtime = per_cpu_ptr(&softirq_runtime, cpu); + raw_spin_lock(&si_runtime->lock); + forward_softirq_expires(si_runtime); + raw_spin_unlock(&si_runtime->lock); + } raw_spin_unlock_irqrestore(&si_throttle.lock, flags); } @@ -129,9 +180,17 @@ int softirq_throttle_handler(struct ctl_table *table, int write, void *buffer, static void softirq_throttle_init(void) { + struct softirq_runtime *si_runtime; + si_throttle.period = sysctl_softirq_period_ms; si_throttle.runtime = sysctl_softirq_runtime_ms; raw_spin_lock_init(&si_throttle.lock); + + for_each_possible_cpu(cpu) { + si_runtime = per_cpu_ptr(&softirq_runtime, cpu); + forward_softirq_expires(si_runtime); + raw_spin_lock_init(&si_runtime->lock); + } } #endif @@ -592,6 +651,13 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) __u32 pending; int softirq_bit; +#ifdef CONFIG_SOFTIRQ_THROTTLE + bool exceeded = softirq_runtime_exceeded(); + + if (exceeded) + return; +#endif + /* * Mask out PF_MEMALLOC as the current task context is borrowed for the * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC @@ -652,6 +718,10 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) wakeup_softirqd(); } +#ifdef CONFIG_SOFTIRQ_THROTTLE + update_softirq_runtime(jiffies - (end - MAX_SOFTIRQ_TIME)); +#endif + account_softirq_exit(current); lockdep_softirq_end(in_hardirq); softirq_handle_end(); -- 2.17.1